Reduce number of calls to stas_parse()

This commit is contained in:
Starbeamrainbowlabs 2020-04-21 21:01:00 +01:00
parent 9f4dd1c149
commit c1af093ada
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
3 changed files with 21 additions and 18 deletions

View File

@ -47,6 +47,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t
- Optimisation: Refactor `stas_split()` to be faster (informal testing shows ~18% faster → 4% total time) - Optimisation: Refactor `stas_split()` to be faster (informal testing shows ~18% faster → 4% total time)
- [Module Api] Optimisation: Remove `search::transliterate` because it has a huge overhead. Use `search::$literator->transliterate()` instead. - [Module Api] Optimisation: Remove `search::transliterate` because it has a huge overhead. Use `search::$literator->transliterate()` instead.
- [Module Api] Add new `absolute` and `html` optional boolean arguments to `render_timestamp()` - [Module Api] Add new `absolute` and `html` optional boolean arguments to `render_timestamp()`
- [Module Api] `search::extract_context()` and `search::highlight_context()` now take in a _parsed_ query (with `search::stas_parse()`), not a raw string
## v0.20.3-hotfix3 ## v0.20.3-hotfix3

View File

@ -1,7 +1,7 @@
<?php <?php
register_module([ register_module([
"name" => "Search", "name" => "Search",
"version" => "0.13", "version" => "0.13.1",
"author" => "Starbeamrainbowlabs", "author" => "Starbeamrainbowlabs",
"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.", "description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"id" => "feature-search", "id" => "feature-search",
@ -141,7 +141,8 @@ register_module([
$env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3); $env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3);
$time_start = microtime(true); $time_start = microtime(true);
$results = search::invindex_query($_GET["query"]); $query_parsed = null;
$results = search::invindex_query($_GET["query"], $query_parsed);
$resultCount = count($results); $resultCount = count($results);
$env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3); $env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3);
@ -152,7 +153,7 @@ register_module([
foreach($results as &$result) { foreach($results as &$result) {
$result["context"] = search::extract_context( $result["context"] = search::extract_context(
$result["pagename"], $result["pagename"],
$_GET["query"], $query_parsed,
file_get_contents($env->storage_prefix . $result["pagename"] . ".md") file_get_contents($env->storage_prefix . $result["pagename"] . ".md")
); );
} }
@ -203,7 +204,7 @@ register_module([
$content .= "<br /><small><em>Pssst! Power users can make use of $settings->sitename's advanced query syntax. Learn about it <a href='?action=help#27-search'>here</a>!</em></small></p>"; $content .= "<br /><small><em>Pssst! Power users can make use of $settings->sitename's advanced query syntax. Learn about it <a href='?action=help#27-search'>here</a>!</em></small></p>";
if(module_exists("page-list")) { if(module_exists("page-list")) {
// TODO: Refactor ths to use STAS // TODO: Refactor this to use STAS
$nterms = search::tokenize($query); $nterms = search::tokenize($query);
$nterms_regex = implode("|", array_map(function($nterm) { $nterms_regex = implode("|", array_map(function($nterm) {
return preg_quote(strtolower(trim($nterm))); return preg_quote(strtolower(trim($nterm)));
@ -236,7 +237,7 @@ register_module([
$context = mb_substr($pagesource, 0, $settings->search_characters_context * 2); $context = mb_substr($pagesource, 0, $settings->search_characters_context * 2);
//echo("'Generated search context for " . $result["pagename"] . ": $context'\n"); //echo("'Generated search context for " . $result["pagename"] . ": $context'\n");
$context = search::highlight_context( $context = search::highlight_context(
$_GET["query"], $query_parsed,
preg_replace('/</u', '&lt;', $context) preg_replace('/</u', '&lt;', $context)
); );
/*if(strlen($context) == 0) /*if(strlen($context) == 0)

View File

@ -1,7 +1,7 @@
<?php <?php
register_module([ register_module([
"name" => "Library: Search engine", "name" => "Library: Search engine",
"version" => "0.13", "version" => "0.13.1",
"author" => "Starbeamrainbowlabs", "author" => "Starbeamrainbowlabs",
"description" => "A library module that provides the backend to the search engine module.", "description" => "A library module that provides the backend to the search engine module.",
"id" => "lib-search-engine", "id" => "lib-search-engine",
@ -670,16 +670,17 @@ class search
"exact" => $exact // If true then we shouldn't try to autocorrect it "exact" => $exact // If true then we shouldn't try to autocorrect it
]; ];
} }
// Correct typos, but only if that's enabled // Correct typos, but only if that's enabled
if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) { if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) {
$terms_count = count($result["terms"]); $terms_count = count($result["terms"]);
for($i = 0; $i < $terms_count; $i++) { for($i = 0; $i < $terms_count; $i++) {
error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)"); // error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)");
if($result["terms"][$i]["exact"] || // Skip exact-only if($result["terms"][$i]["exact"] || // Skip exact-only
$result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words $result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words
self::invindex_term_exists($result["terms"][$i]["term"])) { self::invindex_term_exists($result["terms"][$i]["term"]))
$i++; continue; continue;
}
// It's not a stop word or in the index, try and correct it // It's not a stop word or in the index, try and correct it
// self::didyoumean_correct auto-loads the didyoumean index on-demand // self::didyoumean_correct auto-loads the didyoumean index on-demand
@ -687,7 +688,7 @@ class search
// Make a note if we fail to correct a term // Make a note if we fail to correct a term
if(!is_string($correction)) { if(!is_string($correction)) {
$result["terms"][$i]["corrected"] = false; $result["terms"][$i]["corrected"] = false;
$i++; continue; continue;
} }
$result["terms"][$i]["term_before"] = $result["terms"][$i]["term"]; $result["terms"][$i]["term_before"] = $result["terms"][$i]["term"];
@ -718,7 +719,7 @@ class search
* Note that this automatically pushes the query string through STAS which * Note that this automatically pushes the query string through STAS which
* can be a fairly expensive operation, so use 2nd argument if you need * can be a fairly expensive operation, so use 2nd argument if you need
* to debug the STAS parsing result if possible. * to debug the STAS parsing result if possible.
* @param string $query The search query. * @param string $query The search query. If an array is passed, it is assumed it has already been pre-parsed with search::stas_parse().
* @param &stdClass $query_stas An object to fill with the result of the STAS parsing. * @param &stdClass $query_stas An object to fill with the result of the STAS parsing.
* @return array An array of matching pages. * @return array An array of matching pages.
*/ */
@ -892,16 +893,16 @@ class search
* Extracts a context string (in HTML) given a search query that could be displayed * Extracts a context string (in HTML) given a search query that could be displayed
* in a list of search results. * in a list of search results.
* @param string $pagename The name of the paget that this source belongs to. Used when consulting the inverted index. * @param string $pagename The name of the paget that this source belongs to. Used when consulting the inverted index.
* @param string $query The search queary to generate the context for. * @param string $query_parsed The *parsed* search query to generate the context for (basically the output of search::stas_parse()).
* @param string $source The page source to extract the context from. * @param string $source The page source to extract the context from.
* @return string The generated context string. * @return string The generated context string.
*/ */
public static function extract_context($pagename, $query, $source) public static function extract_context($pagename, $query_parsed, $source)
{ {
global $settings; global $settings;
$pageid = ids::getid($pagename); $pageid = ids::getid($pagename);
$nterms = self::stas_parse(self::stas_split($query))["terms"]; $nterms = $query_parsed["terms"];
// Query the inverted index for offsets // Query the inverted index for offsets
$matches = []; $matches = [];
@ -978,13 +979,13 @@ class search
/** /**
* Highlights the keywords of a context string. * Highlights the keywords of a context string.
* @param string $query The query to use when highlighting. * @param array $query_parsed The *parsed* query to use when highlighting (the output of search::stas_parse())
* @param string $context The context string to highlight. * @param string $context The context string to highlight.
* @return string The highlighted (HTML) string. * @return string The highlighted (HTML) string.
*/ */
public static function highlight_context($query, $context) public static function highlight_context($query_parsed, $context)
{ {
$qterms = self::stas_parse(self::stas_split($query))["terms"]; $qterms = $query_parsed["terms"];
foreach($qterms as $qterm) { foreach($qterms as $qterm) {
// Stop words are marked by STAS // Stop words are marked by STAS