Reduce number of calls to stas_parse()

This commit is contained in:
Starbeamrainbowlabs 2020-04-21 21:01:00 +01:00
parent 9f4dd1c149
commit c1af093ada
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
3 changed files with 21 additions and 18 deletions

View File

@ -47,6 +47,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t
- Optimisation: Refactor `stas_split()` to be faster (informal testing shows ~18% faster → 4% total time)
- [Module Api] Optimisation: Remove `search::transliterate` because it has a huge overhead. Use `search::$literator->transliterate()` instead.
- [Module Api] Add new `absolute` and `html` optional boolean arguments to `render_timestamp()`
- [Module Api] `search::extract_context()` and `search::highlight_context()` now take in a _parsed_ query (with `search::stas_parse()`), not a raw string
## v0.20.3-hotfix3

View File

@ -1,7 +1,7 @@
<?php
register_module([
"name" => "Search",
"version" => "0.13",
"version" => "0.13.1",
"author" => "Starbeamrainbowlabs",
"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"id" => "feature-search",
@ -141,7 +141,8 @@ register_module([
$env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3);
$time_start = microtime(true);
$results = search::invindex_query($_GET["query"]);
$query_parsed = null;
$results = search::invindex_query($_GET["query"], $query_parsed);
$resultCount = count($results);
$env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3);
@ -152,7 +153,7 @@ register_module([
foreach($results as &$result) {
$result["context"] = search::extract_context(
$result["pagename"],
$_GET["query"],
$query_parsed,
file_get_contents($env->storage_prefix . $result["pagename"] . ".md")
);
}
@ -203,7 +204,7 @@ register_module([
$content .= "<br /><small><em>Pssst! Power users can make use of $settings->sitename's advanced query syntax. Learn about it <a href='?action=help#27-search'>here</a>!</em></small></p>";
if(module_exists("page-list")) {
// TODO: Refactor ths to use STAS
// TODO: Refactor this to use STAS
$nterms = search::tokenize($query);
$nterms_regex = implode("|", array_map(function($nterm) {
return preg_quote(strtolower(trim($nterm)));
@ -236,7 +237,7 @@ register_module([
$context = mb_substr($pagesource, 0, $settings->search_characters_context * 2);
//echo("'Generated search context for " . $result["pagename"] . ": $context'\n");
$context = search::highlight_context(
$_GET["query"],
$query_parsed,
preg_replace('/</u', '&lt;', $context)
);
/*if(strlen($context) == 0)

View File

@ -1,7 +1,7 @@
<?php
register_module([
"name" => "Library: Search engine",
"version" => "0.13",
"version" => "0.13.1",
"author" => "Starbeamrainbowlabs",
"description" => "A library module that provides the backend to the search engine module.",
"id" => "lib-search-engine",
@ -670,16 +670,17 @@ class search
"exact" => $exact // If true then we shouldn't try to autocorrect it
];
}
// Correct typos, but only if that's enabled
if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) {
$terms_count = count($result["terms"]);
for($i = 0; $i < $terms_count; $i++) {
error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)");
// error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)");
if($result["terms"][$i]["exact"] || // Skip exact-only
$result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words
self::invindex_term_exists($result["terms"][$i]["term"])) {
$i++; continue;
}
self::invindex_term_exists($result["terms"][$i]["term"]))
continue;
// It's not a stop word or in the index, try and correct it
// self::didyoumean_correct auto-loads the didyoumean index on-demand
@ -687,7 +688,7 @@ class search
// Make a note if we fail to correct a term
if(!is_string($correction)) {
$result["terms"][$i]["corrected"] = false;
$i++; continue;
continue;
}
$result["terms"][$i]["term_before"] = $result["terms"][$i]["term"];
@ -718,7 +719,7 @@ class search
* Note that this automatically pushes the query string through STAS which
* can be a fairly expensive operation, so use 2nd argument if you need
* to debug the STAS parsing result if possible.
* @param string $query The search query.
* @param string $query The search query. If an array is passed, it is assumed it has already been pre-parsed with search::stas_parse().
* @param &stdClass $query_stas An object to fill with the result of the STAS parsing.
* @return array An array of matching pages.
*/
@ -892,16 +893,16 @@ class search
* Extracts a context string (in HTML) given a search query that could be displayed
* in a list of search results.
* @param string $pagename The name of the paget that this source belongs to. Used when consulting the inverted index.
* @param string $query The search queary to generate the context for.
* @param string $query_parsed The *parsed* search query to generate the context for (basically the output of search::stas_parse()).
* @param string $source The page source to extract the context from.
* @return string The generated context string.
*/
public static function extract_context($pagename, $query, $source)
public static function extract_context($pagename, $query_parsed, $source)
{
global $settings;
$pageid = ids::getid($pagename);
$nterms = self::stas_parse(self::stas_split($query))["terms"];
$nterms = $query_parsed["terms"];
// Query the inverted index for offsets
$matches = [];
@ -978,13 +979,13 @@ class search
/**
* Highlights the keywords of a context string.
* @param string $query The query to use when highlighting.
* @param array $query_parsed The *parsed* query to use when highlighting (the output of search::stas_parse())
* @param string $context The context string to highlight.
* @return string The highlighted (HTML) string.
*/
public static function highlight_context($query, $context)
public static function highlight_context($query_parsed, $context)
{
$qterms = self::stas_parse(self::stas_split($query))["terms"];
$qterms = $query_parsed["terms"];
foreach($qterms as $qterm) {
// Stop words are marked by STAS