1
0
Fork 0
mirror of https://github.com/sbrl/Pepperminty-Wiki.git synced 2024-11-25 17:23:00 +00:00

More bugfixes & optimisations, but I'm still not sure what's up with the correction system

This commit is contained in:
Starbeamrainbowlabs 2020-03-15 21:28:56 +00:00
parent 99f920f11d
commit 5232bb3060
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
3 changed files with 23 additions and 10 deletions

View file

@ -348,18 +348,22 @@ class BkTree {
* @return array<string> Similar resultant strings from the BK-Tree. * @return array<string> Similar resultant strings from the BK-Tree.
*/ */
public function lookup(string $string, int $max_distance = 1, int $count = 0) : array { public function lookup(string $string, int $max_distance = 1, int $count = 0) : array {
error_log("[BkTree/lookup]".var_export($string, true).", dist ".var_export($max_distance, true).", count:".var_export($count, true));
if($this->get_node_count() == 0) return null; if($this->get_node_count() == 0) return null;
$result = []; $result_count = 0; $result = []; $result_count = 0;
$stack = [ $this->box->get("node|0") ]; $stack = [ $this->box->get("node|0") ];
$stack_top = 0; $stack_top = 0;
$nodes = 0;
// https://softwareengineering.stackexchange.com/a/226162/58491 // https://softwareengineering.stackexchange.com/a/226162/58491
while($stack_top >= 0) { while($stack_top >= 0) {
// Take the topmost node off the stack // Take the topmost node off the stack
$node_current = $stack[$stack_top]; $node_current = $stack[$stack_top];
unset($stack[$stack_top]); unset($stack[$stack_top]);
$stack_top--; $stack_top--;
$nodes++;
$distance = levenshtein($string, $node_current->value, $this->cost_insert, $this->cost_replace, $this->cost_delete); $distance = levenshtein($string, $node_current->value, $this->cost_insert, $this->cost_replace, $this->cost_delete);
@ -379,6 +383,8 @@ class BkTree {
} }
} }
error_log("Nodes traversed: $nodes\n");
return $result; return $result;
} }

View file

@ -308,7 +308,8 @@ register_module([
search::invindex_load($paths->searchindex); search::invindex_load($paths->searchindex);
$env->perfdata->searchindex_decode_time = (microtime(true) - $env->perfdata->searchindex_decode_start) * 1000; $env->perfdata->searchindex_decode_time = (microtime(true) - $env->perfdata->searchindex_decode_start) * 1000;
$env->perfdata->searchindex_query_start = microtime(true); $env->perfdata->searchindex_query_start = microtime(true);
$searchResults = search::invindex_query($_GET["query"]); $query_stas = null;
$searchResults = search::invindex_query($_GET["query"], $query_stas);
$env->perfdata->searchindex_query_time = (microtime(true) - $env->perfdata->searchindex_query_start) * 1000; $env->perfdata->searchindex_query_time = (microtime(true) - $env->perfdata->searchindex_query_start) * 1000;
header("content-type: application/json"); header("content-type: application/json");
@ -319,7 +320,8 @@ register_module([
if(isset($env->perfdata->didyoumean_correction)) if(isset($env->perfdata->didyoumean_correction))
$result->didyoumean_correction_time = $env->perfdata->didyoumean_correction; $result->didyoumean_correction_time = $env->perfdata->didyoumean_correction;
$result->total_time = $result->decode_time + $result->query_time; $result->total_time = $result->decode_time + $result->query_time;
$result->stas = search::stas_parse(search::stas_split($_GET["query"])); // $result->stas = search::stas_parse(search::stas_split($_GET["query"]));
$result->stas = $query_stas;
$result->search_results = $searchResults; $result->search_results = $searchResults;
exit(json_encode($result, JSON_PRETTY_PRINT)); exit(json_encode($result, JSON_PRETTY_PRINT));
}); });

View file

@ -171,7 +171,6 @@ class search
*/ */
public static function didyoumean_correct(string $term) : ?string { public static function didyoumean_correct(string $term) : ?string {
global $settings, $paths, $env; global $settings, $paths, $env;
$start_time = microtime(true); $start_time = microtime(true);
// Load the didyoumean index, but only if it's enabled etc // Load the didyoumean index, but only if it's enabled etc
@ -186,15 +185,16 @@ class search
$term, $term,
$settings->search_didyoumean_editdistance $settings->search_didyoumean_editdistance
); );
if(empty($results)) return null; if(!empty($results)) {
usort($results, function($a, $b) : int { usort($results, function($a, $b) : int {
return self::compare($a, $b); return self::compare($a, $b);
}); });
}
if(!isset($env->perfdata->didyoumean_correction)) if(!isset($env->perfdata->didyoumean_correction))
$env->perfdata->didyoumean_correction = 0; $env->perfdata->didyoumean_correction = 0;
$env->perfdata->didyoumean_correction += (microtime(true) - $start_time) * 1000; $env->perfdata->didyoumean_correction += (microtime(true) - $start_time) * 1000;
return $results[0]; return $results[0] ?? null;
} }
public static function didyoumean_rebuild(bool $output = true) : void { public static function didyoumean_rebuild(bool $output = true) : void {
@ -674,6 +674,7 @@ class search
if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) { if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) {
$terms_count = count($result["terms"]); $terms_count = count($result["terms"]);
for($i = 0; $i < $terms_count; $i++) { for($i = 0; $i < $terms_count; $i++) {
error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)");
if($result["terms"][$i]["exact"] || // Skip exact-only if($result["terms"][$i]["exact"] || // Skip exact-only
$result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words $result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words
self::invindex_term_exists($result["terms"][$i]["term"])) continue; self::invindex_term_exists($result["terms"][$i]["term"])) continue;
@ -712,10 +713,14 @@ class search
/** /**
* Searches the given inverted index for the specified search terms. * Searches the given inverted index for the specified search terms.
* @param string $query The search query. * Note that this automatically pushes the query string through STAS which
* can be a fairly expensive operation, so use 2nd argument if you need
* to debug the STAS parsing result if possible.
* @param string $query The search query.
* @param &stdClass $query_stas An object to fill with the result of the STAS parsing.
* @return array An array of matching pages. * @return array An array of matching pages.
*/ */
public static function invindex_query($query) public static function invindex_query($query, &$query_stas = null)
{ {
global $settings, $pageindex; global $settings, $pageindex;