From c1af093ada1794206472854b84bdfe46acd274aa Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Tue, 21 Apr 2020 21:01:00 +0100 Subject: [PATCH] Reduce number of calls to stas_parse() --- Changelog.md | 1 + modules/feature-search.php | 11 ++++++----- modules/lib-search-engine.php | 27 ++++++++++++++------------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/Changelog.md b/Changelog.md index 3a9e044..9fbac90 100644 --- a/Changelog.md +++ b/Changelog.md @@ -47,6 +47,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t - Optimisation: Refactor `stas_split()` to be faster (informal testing shows ~18% faster → 4% total time) - [Module Api] Optimisation: Remove `search::transliterate` because it has a huge overhead. Use `search::$literator->transliterate()` instead. - [Module Api] Add new `absolute` and `html` optional boolean arguments to `render_timestamp()` + - [Module Api] `search::extract_context()` and `search::highlight_context()` now take in a _parsed_ query (with `search::stas_parse()`), not a raw string ## v0.20.3-hotfix3 diff --git a/modules/feature-search.php b/modules/feature-search.php index 790976c..84f5e49 100644 --- a/modules/feature-search.php +++ b/modules/feature-search.php @@ -1,7 +1,7 @@ "Search", - "version" => "0.13", + "version" => "0.13.1", "author" => "Starbeamrainbowlabs", "description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.", "id" => "feature-search", @@ -141,7 +141,8 @@ register_module([ $env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3); $time_start = microtime(true); - $results = search::invindex_query($_GET["query"]); + $query_parsed = null; + $results = search::invindex_query($_GET["query"], $query_parsed); $resultCount = count($results); $env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3); @@ -152,7 +153,7 @@ register_module([ foreach($results as &$result) { $result["context"] = search::extract_context( $result["pagename"], - $_GET["query"], + $query_parsed, file_get_contents($env->storage_prefix . $result["pagename"] . ".md") ); } @@ -203,7 +204,7 @@ register_module([ $content .= "
Pssst! Power users can make use of $settings->sitename's advanced query syntax. Learn about it here!

"; if(module_exists("page-list")) { - // TODO: Refactor ths to use STAS + // TODO: Refactor this to use STAS $nterms = search::tokenize($query); $nterms_regex = implode("|", array_map(function($nterm) { return preg_quote(strtolower(trim($nterm))); @@ -236,7 +237,7 @@ register_module([ $context = mb_substr($pagesource, 0, $settings->search_characters_context * 2); //echo("'Generated search context for " . $result["pagename"] . ": $context'\n"); $context = search::highlight_context( - $_GET["query"], + $query_parsed, preg_replace('/ "Library: Search engine", - "version" => "0.13", + "version" => "0.13.1", "author" => "Starbeamrainbowlabs", "description" => "A library module that provides the backend to the search engine module.", "id" => "lib-search-engine", @@ -670,16 +670,17 @@ class search "exact" => $exact // If true then we shouldn't try to autocorrect it ]; } + + // Correct typos, but only if that's enabled if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) { $terms_count = count($result["terms"]); for($i = 0; $i < $terms_count; $i++) { - error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)"); + // error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)"); if($result["terms"][$i]["exact"] || // Skip exact-only $result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words - self::invindex_term_exists($result["terms"][$i]["term"])) { - $i++; continue; - } + self::invindex_term_exists($result["terms"][$i]["term"])) + continue; // It's not a stop word or in the index, try and correct it // self::didyoumean_correct auto-loads the didyoumean index on-demand @@ -687,7 +688,7 @@ class search // Make a note if we fail to correct a term if(!is_string($correction)) { $result["terms"][$i]["corrected"] = false; - $i++; continue; + continue; } $result["terms"][$i]["term_before"] = $result["terms"][$i]["term"]; @@ -718,7 +719,7 @@ class search * Note that this automatically pushes the query string through STAS which * can be a fairly expensive operation, so use 2nd argument if you need * to debug the STAS parsing result if possible. - * @param string $query The search query. + * @param string $query The search query. If an array is passed, it is assumed it has already been pre-parsed with search::stas_parse(). * @param &stdClass $query_stas An object to fill with the result of the STAS parsing. * @return array An array of matching pages. */ @@ -892,16 +893,16 @@ class search * Extracts a context string (in HTML) given a search query that could be displayed * in a list of search results. * @param string $pagename The name of the paget that this source belongs to. Used when consulting the inverted index. - * @param string $query The search queary to generate the context for. + * @param string $query_parsed The *parsed* search query to generate the context for (basically the output of search::stas_parse()). * @param string $source The page source to extract the context from. * @return string The generated context string. */ - public static function extract_context($pagename, $query, $source) + public static function extract_context($pagename, $query_parsed, $source) { global $settings; $pageid = ids::getid($pagename); - $nterms = self::stas_parse(self::stas_split($query))["terms"]; + $nterms = $query_parsed["terms"]; // Query the inverted index for offsets $matches = []; @@ -978,13 +979,13 @@ class search /** * Highlights the keywords of a context string. - * @param string $query The query to use when highlighting. + * @param array $query_parsed The *parsed* query to use when highlighting (the output of search::stas_parse()) * @param string $context The context string to highlight. * @return string The highlighted (HTML) string. */ - public static function highlight_context($query, $context) + public static function highlight_context($query_parsed, $context) { - $qterms = self::stas_parse(self::stas_split($query))["terms"]; + $qterms = $query_parsed["terms"]; foreach($qterms as $qterm) { // Stop words are marked by STAS