1
0
Fork 0
mirror of https://github.com/sbrl/Pepperminty-Wiki.git synced 2024-11-22 16:33:00 +00:00

Finish refactoring invindex_query

This commit is contained in:
Starbeamrainbowlabs 2019-08-22 17:43:14 +01:00
parent 94bc787ea7
commit e08e775d98
Signed by: sbrl
GPG key ID: 1BE5172E637709C2

View file

@ -132,15 +132,15 @@ register_module([
$time_start = microtime(true); $time_start = microtime(true);
$invindex = search::invindex_load($paths->searchindex); search::invindex_load($paths->searchindex);
$env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3); $env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3);
$time_start = microtime(true); $time_start = microtime(true);
$results = search::invindex_query($_GET["query"], $invindex); $results = search::invindex_query($_GET["query"]);
$resultCount = count($results); $resultCount = count($results);
$env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3); $env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3);
header("x-invindex-decode-time: {$env->perfdata->invindex_decode_time}ms"); header("x-invindex-load-time: {$env->perfdata->invindex_decode_time}ms");
header("x-invindex-query-time: {$env->perfdata->invindex_query_time}ms"); header("x-invindex-query-time: {$env->perfdata->invindex_query_time}ms");
$start = microtime(true); $start = microtime(true);
@ -179,29 +179,26 @@ register_module([
$content .= "<p>Found $resultCount " . ($resultCount === 1 ? "result" : "results") . " in " . $env->perfdata->search_time . "ms. "; $content .= "<p>Found $resultCount " . ($resultCount === 1 ? "result" : "results") . " in " . $env->perfdata->search_time . "ms. ";
$query = $_GET["query"]; $query = $_GET["query"];
if(isset($pageindex->$query)) if(isset($pageindex->$query)) {
{
$content .= "There's a page on $settings->sitename called <a href='?page=" . rawurlencode($query) . "'>$query</a>."; $content .= "There's a page on $settings->sitename called <a href='?page=" . rawurlencode($query) . "'>$query</a>.";
} }
else else
{ {
$content .= "There isn't a page called $query on $settings->sitename, but you "; $content .= "There isn't a page called $query on $settings->sitename, but you ";
if((!$settings->anonedits && !$env->is_logged_in) || !$settings->editing) if((!$settings->anonedits && !$env->is_logged_in) || !$settings->editing) {
{
$content .= "do not have permission to create it."; $content .= "do not have permission to create it.";
if(!$env->is_logged_in) if(!$env->is_logged_in) {
{
$content .= " You could try <a href='?action=login&returnto=" . rawurlencode($_SERVER["REQUEST_URI"]) . "'>logging in</a>."; $content .= " You could try <a href='?action=login&returnto=" . rawurlencode($_SERVER["REQUEST_URI"]) . "'>logging in</a>.";
} }
} }
else else {
{
$content .= "can <a href='?action=edit&page=" . rawurlencode($query) . "'>create it</a>."; $content .= "can <a href='?action=edit&page=" . rawurlencode($query) . "'>create it</a>.";
} }
} }
$content .= "</p>"; $content .= "</p>";
if(module_exists("page-list")) { if(module_exists("page-list")) {
// TODO: Refactor ths to use STAS
$nterms = search::tokenize($query); $nterms = search::tokenize($query);
$nterms_regex = implode("|", array_map(function($nterm) { $nterms_regex = implode("|", array_map(function($nterm) {
return preg_quote(strtolower(trim($nterm))); return preg_quote(strtolower(trim($nterm)));
@ -1172,8 +1169,7 @@ class search
// TODO: We got up to here; finish refactoring invindex_query // TODO: We got up to here; finish refactoring invindex_query
reset($matching_pages); reset($matching_pages);
foreach($matching_pages as $pageid => &$pagedata) foreach($matching_pages as $pageid => &$pagedata) {
{
$pagedata["pagename"] = ids::getpagename($pageid); $pagedata["pagename"] = ids::getpagename($pageid);
$pagedata["rank"] = 0; $pagedata["rank"] = 0;
@ -1181,24 +1177,20 @@ class search
// Loop over each search term found on this page // Loop over each search term found on this page
reset($pagedata["nterms"]); reset($pagedata["nterms"]);
foreach($pagedata["nterms"] as $pterm => $entry) foreach($pagedata["nterms"] as $pterm => $frequency) {
{
// Add the number of occurrences of this search term to the ranking // Add the number of occurrences of this search term to the ranking
// Multiply it by the length of the word // Multiply it by the length of the word
$pagedata["rank"] += $entry["freq"] * strlen($pterm); $pagedata["rank"] += $frequency * strlen($pterm);
// Add the offsets to a listof all offsets on this page
foreach($entry["offsets"] as $offset)
$pageOffsets[] = $offset;
} }
// Consider matches in the title / tags // Consider matches in the title / tags
if(isset($pagedata["title-matches"])) $pagedata["rank"] += $pagedata["rank_title"] + $pagedata["rank_tags"];
$pagedata["rank"] += $pagedata["title-matches"] * $settings->search_title_matches_weighting;
if(isset($pagedata["tag-matches"]))
$pagedata["rank"] += $pagedata["tag-matches"] * $settings->search_tags_matches_weighting;
// todo remove items if the rank is below a threshold // TODO: Consider implementing kernel density estimation here.
// https://en.wikipedia.org/wiki/Kernel_density_estimation
// We want it to have more of an effect the more words that are present in the query. Maybe a logarithmic function would be worth investigating here?
// TODO: Remove items if the computed rank is below a threshold
} }
uasort($matching_pages, function($a, $b) { uasort($matching_pages, function($a, $b) {