More bugfixes & optimisations, but I'm still not sure what's up with the correction system

This commit is contained in:
Starbeamrainbowlabs 2020-03-15 21:28:56 +00:00
parent 99f920f11d
commit 5232bb3060
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
3 changed files with 23 additions and 10 deletions

View File

@ -348,18 +348,22 @@ class BkTree {
* @return array<string> Similar resultant strings from the BK-Tree.
*/
public function lookup(string $string, int $max_distance = 1, int $count = 0) : array {
error_log("[BkTree/lookup]".var_export($string, true).", dist ".var_export($max_distance, true).", count:".var_export($count, true));
if($this->get_node_count() == 0) return null;
$result = []; $result_count = 0;
$stack = [ $this->box->get("node|0") ];
$stack_top = 0;
$nodes = 0;
// https://softwareengineering.stackexchange.com/a/226162/58491
while($stack_top >= 0) {
// Take the topmost node off the stack
$node_current = $stack[$stack_top];
unset($stack[$stack_top]);
$stack_top--;
$nodes++;
$distance = levenshtein($string, $node_current->value, $this->cost_insert, $this->cost_replace, $this->cost_delete);
@ -379,6 +383,8 @@ class BkTree {
}
}
error_log("Nodes traversed: $nodes\n");
return $result;
}

View File

@ -308,7 +308,8 @@ register_module([
search::invindex_load($paths->searchindex);
$env->perfdata->searchindex_decode_time = (microtime(true) - $env->perfdata->searchindex_decode_start) * 1000;
$env->perfdata->searchindex_query_start = microtime(true);
$searchResults = search::invindex_query($_GET["query"]);
$query_stas = null;
$searchResults = search::invindex_query($_GET["query"], $query_stas);
$env->perfdata->searchindex_query_time = (microtime(true) - $env->perfdata->searchindex_query_start) * 1000;
header("content-type: application/json");
@ -319,7 +320,8 @@ register_module([
if(isset($env->perfdata->didyoumean_correction))
$result->didyoumean_correction_time = $env->perfdata->didyoumean_correction;
$result->total_time = $result->decode_time + $result->query_time;
$result->stas = search::stas_parse(search::stas_split($_GET["query"]));
// $result->stas = search::stas_parse(search::stas_split($_GET["query"]));
$result->stas = $query_stas;
$result->search_results = $searchResults;
exit(json_encode($result, JSON_PRETTY_PRINT));
});

View File

@ -171,7 +171,6 @@ class search
*/
public static function didyoumean_correct(string $term) : ?string {
global $settings, $paths, $env;
$start_time = microtime(true);
// Load the didyoumean index, but only if it's enabled etc
@ -186,15 +185,16 @@ class search
$term,
$settings->search_didyoumean_editdistance
);
if(empty($results)) return null;
usort($results, function($a, $b) : int {
return self::compare($a, $b);
});
if(!empty($results)) {
usort($results, function($a, $b) : int {
return self::compare($a, $b);
});
}
if(!isset($env->perfdata->didyoumean_correction))
$env->perfdata->didyoumean_correction = 0;
$env->perfdata->didyoumean_correction += (microtime(true) - $start_time) * 1000;
return $results[0];
return $results[0] ?? null;
}
public static function didyoumean_rebuild(bool $output = true) : void {
@ -674,6 +674,7 @@ class search
if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) {
$terms_count = count($result["terms"]);
for($i = 0; $i < $terms_count; $i++) {
error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)");
if($result["terms"][$i]["exact"] || // Skip exact-only
$result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words
self::invindex_term_exists($result["terms"][$i]["term"])) continue;
@ -712,10 +713,14 @@ class search
/**
* Searches the given inverted index for the specified search terms.
* @param string $query The search query.
* Note that this automatically pushes the query string through STAS which
* can be a fairly expensive operation, so use 2nd argument if you need
* to debug the STAS parsing result if possible.
* @param string $query The search query.
* @param &stdClass $query_stas An object to fill with the result of the STAS parsing.
* @return array An array of matching pages.
*/
public static function invindex_query($query)
public static function invindex_query($query, &$query_stas = null)
{
global $settings, $pageindex;