From 5232bb30608b9577b8b1209cc620cd166b4f64fa Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Sun, 15 Mar 2020 21:28:56 +0000 Subject: [PATCH] More bugfixes & optimisations, but I'm still not sure what's up with the correction system --- modules/feature-search-didyoumean.php | 6 ++++++ modules/feature-search.php | 6 ++++-- modules/lib-search-engine.php | 21 +++++++++++++-------- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/modules/feature-search-didyoumean.php b/modules/feature-search-didyoumean.php index 02f68fb..2e7dcbd 100644 --- a/modules/feature-search-didyoumean.php +++ b/modules/feature-search-didyoumean.php @@ -348,18 +348,22 @@ class BkTree { * @return array Similar resultant strings from the BK-Tree. */ public function lookup(string $string, int $max_distance = 1, int $count = 0) : array { + error_log("[BkTree/lookup]".var_export($string, true).", dist ".var_export($max_distance, true).", count:".var_export($count, true)); if($this->get_node_count() == 0) return null; $result = []; $result_count = 0; $stack = [ $this->box->get("node|0") ]; $stack_top = 0; + $nodes = 0; + // https://softwareengineering.stackexchange.com/a/226162/58491 while($stack_top >= 0) { // Take the topmost node off the stack $node_current = $stack[$stack_top]; unset($stack[$stack_top]); $stack_top--; + $nodes++; $distance = levenshtein($string, $node_current->value, $this->cost_insert, $this->cost_replace, $this->cost_delete); @@ -379,6 +383,8 @@ class BkTree { } } + error_log("Nodes traversed: $nodes\n"); + return $result; } diff --git a/modules/feature-search.php b/modules/feature-search.php index eb8eb4b..790976c 100644 --- a/modules/feature-search.php +++ b/modules/feature-search.php @@ -308,7 +308,8 @@ register_module([ search::invindex_load($paths->searchindex); $env->perfdata->searchindex_decode_time = (microtime(true) - $env->perfdata->searchindex_decode_start) * 1000; $env->perfdata->searchindex_query_start = microtime(true); - $searchResults = search::invindex_query($_GET["query"]); + $query_stas = null; + $searchResults = search::invindex_query($_GET["query"], $query_stas); $env->perfdata->searchindex_query_time = (microtime(true) - $env->perfdata->searchindex_query_start) * 1000; header("content-type: application/json"); @@ -319,7 +320,8 @@ register_module([ if(isset($env->perfdata->didyoumean_correction)) $result->didyoumean_correction_time = $env->perfdata->didyoumean_correction; $result->total_time = $result->decode_time + $result->query_time; - $result->stas = search::stas_parse(search::stas_split($_GET["query"])); + // $result->stas = search::stas_parse(search::stas_split($_GET["query"])); + $result->stas = $query_stas; $result->search_results = $searchResults; exit(json_encode($result, JSON_PRETTY_PRINT)); }); diff --git a/modules/lib-search-engine.php b/modules/lib-search-engine.php index c500b29..7a730d7 100644 --- a/modules/lib-search-engine.php +++ b/modules/lib-search-engine.php @@ -171,7 +171,6 @@ class search */ public static function didyoumean_correct(string $term) : ?string { global $settings, $paths, $env; - $start_time = microtime(true); // Load the didyoumean index, but only if it's enabled etc @@ -186,15 +185,16 @@ class search $term, $settings->search_didyoumean_editdistance ); - if(empty($results)) return null; - usort($results, function($a, $b) : int { - return self::compare($a, $b); - }); + if(!empty($results)) { + usort($results, function($a, $b) : int { + return self::compare($a, $b); + }); + } if(!isset($env->perfdata->didyoumean_correction)) $env->perfdata->didyoumean_correction = 0; $env->perfdata->didyoumean_correction += (microtime(true) - $start_time) * 1000; - return $results[0]; + return $results[0] ?? null; } public static function didyoumean_rebuild(bool $output = true) : void { @@ -674,6 +674,7 @@ class search if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) { $terms_count = count($result["terms"]); for($i = 0; $i < $terms_count; $i++) { + error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)"); if($result["terms"][$i]["exact"] || // Skip exact-only $result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words self::invindex_term_exists($result["terms"][$i]["term"])) continue; @@ -712,10 +713,14 @@ class search /** * Searches the given inverted index for the specified search terms. - * @param string $query The search query. + * Note that this automatically pushes the query string through STAS which + * can be a fairly expensive operation, so use 2nd argument if you need + * to debug the STAS parsing result if possible. + * @param string $query The search query. + * @param &stdClass $query_stas An object to fill with the result of the STAS parsing. * @return array An array of matching pages. */ - public static function invindex_query($query) + public static function invindex_query($query, &$query_stas = null) { global $settings, $pageindex;