mirror of
https://github.com/sbrl/Pepperminty-Wiki.git
synced 2024-12-22 13:45:02 +00:00
Reduce number of calls to stas_parse()
This commit is contained in:
parent
9f4dd1c149
commit
c1af093ada
3 changed files with 21 additions and 18 deletions
|
@ -47,6 +47,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t
|
|||
- Optimisation: Refactor `stas_split()` to be faster (informal testing shows ~18% faster → 4% total time)
|
||||
- [Module Api] Optimisation: Remove `search::transliterate` because it has a huge overhead. Use `search::$literator->transliterate()` instead.
|
||||
- [Module Api] Add new `absolute` and `html` optional boolean arguments to `render_timestamp()`
|
||||
- [Module Api] `search::extract_context()` and `search::highlight_context()` now take in a _parsed_ query (with `search::stas_parse()`), not a raw string
|
||||
|
||||
|
||||
## v0.20.3-hotfix3
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<?php
|
||||
register_module([
|
||||
"name" => "Search",
|
||||
"version" => "0.13",
|
||||
"version" => "0.13.1",
|
||||
"author" => "Starbeamrainbowlabs",
|
||||
"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
|
||||
"id" => "feature-search",
|
||||
|
@ -141,7 +141,8 @@ register_module([
|
|||
$env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3);
|
||||
|
||||
$time_start = microtime(true);
|
||||
$results = search::invindex_query($_GET["query"]);
|
||||
$query_parsed = null;
|
||||
$results = search::invindex_query($_GET["query"], $query_parsed);
|
||||
$resultCount = count($results);
|
||||
$env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3);
|
||||
|
||||
|
@ -152,7 +153,7 @@ register_module([
|
|||
foreach($results as &$result) {
|
||||
$result["context"] = search::extract_context(
|
||||
$result["pagename"],
|
||||
$_GET["query"],
|
||||
$query_parsed,
|
||||
file_get_contents($env->storage_prefix . $result["pagename"] . ".md")
|
||||
);
|
||||
}
|
||||
|
@ -203,7 +204,7 @@ register_module([
|
|||
$content .= "<br /><small><em>Pssst! Power users can make use of $settings->sitename's advanced query syntax. Learn about it <a href='?action=help#27-search'>here</a>!</em></small></p>";
|
||||
|
||||
if(module_exists("page-list")) {
|
||||
// TODO: Refactor ths to use STAS
|
||||
// TODO: Refactor this to use STAS
|
||||
$nterms = search::tokenize($query);
|
||||
$nterms_regex = implode("|", array_map(function($nterm) {
|
||||
return preg_quote(strtolower(trim($nterm)));
|
||||
|
@ -236,7 +237,7 @@ register_module([
|
|||
$context = mb_substr($pagesource, 0, $settings->search_characters_context * 2);
|
||||
//echo("'Generated search context for " . $result["pagename"] . ": $context'\n");
|
||||
$context = search::highlight_context(
|
||||
$_GET["query"],
|
||||
$query_parsed,
|
||||
preg_replace('/</u', '<', $context)
|
||||
);
|
||||
/*if(strlen($context) == 0)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<?php
|
||||
register_module([
|
||||
"name" => "Library: Search engine",
|
||||
"version" => "0.13",
|
||||
"version" => "0.13.1",
|
||||
"author" => "Starbeamrainbowlabs",
|
||||
"description" => "A library module that provides the backend to the search engine module.",
|
||||
"id" => "lib-search-engine",
|
||||
|
@ -670,16 +670,17 @@ class search
|
|||
"exact" => $exact // If true then we shouldn't try to autocorrect it
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Correct typos, but only if that's enabled
|
||||
if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) {
|
||||
$terms_count = count($result["terms"]);
|
||||
for($i = 0; $i < $terms_count; $i++) {
|
||||
error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)");
|
||||
// error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)");
|
||||
if($result["terms"][$i]["exact"] || // Skip exact-only
|
||||
$result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words
|
||||
self::invindex_term_exists($result["terms"][$i]["term"])) {
|
||||
$i++; continue;
|
||||
}
|
||||
self::invindex_term_exists($result["terms"][$i]["term"]))
|
||||
continue;
|
||||
|
||||
// It's not a stop word or in the index, try and correct it
|
||||
// self::didyoumean_correct auto-loads the didyoumean index on-demand
|
||||
|
@ -687,7 +688,7 @@ class search
|
|||
// Make a note if we fail to correct a term
|
||||
if(!is_string($correction)) {
|
||||
$result["terms"][$i]["corrected"] = false;
|
||||
$i++; continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
$result["terms"][$i]["term_before"] = $result["terms"][$i]["term"];
|
||||
|
@ -718,7 +719,7 @@ class search
|
|||
* Note that this automatically pushes the query string through STAS which
|
||||
* can be a fairly expensive operation, so use 2nd argument if you need
|
||||
* to debug the STAS parsing result if possible.
|
||||
* @param string $query The search query.
|
||||
* @param string $query The search query. If an array is passed, it is assumed it has already been pre-parsed with search::stas_parse().
|
||||
* @param &stdClass $query_stas An object to fill with the result of the STAS parsing.
|
||||
* @return array An array of matching pages.
|
||||
*/
|
||||
|
@ -892,16 +893,16 @@ class search
|
|||
* Extracts a context string (in HTML) given a search query that could be displayed
|
||||
* in a list of search results.
|
||||
* @param string $pagename The name of the paget that this source belongs to. Used when consulting the inverted index.
|
||||
* @param string $query The search queary to generate the context for.
|
||||
* @param string $query_parsed The *parsed* search query to generate the context for (basically the output of search::stas_parse()).
|
||||
* @param string $source The page source to extract the context from.
|
||||
* @return string The generated context string.
|
||||
*/
|
||||
public static function extract_context($pagename, $query, $source)
|
||||
public static function extract_context($pagename, $query_parsed, $source)
|
||||
{
|
||||
global $settings;
|
||||
|
||||
$pageid = ids::getid($pagename);
|
||||
$nterms = self::stas_parse(self::stas_split($query))["terms"];
|
||||
$nterms = $query_parsed["terms"];
|
||||
|
||||
// Query the inverted index for offsets
|
||||
$matches = [];
|
||||
|
@ -978,13 +979,13 @@ class search
|
|||
|
||||
/**
|
||||
* Highlights the keywords of a context string.
|
||||
* @param string $query The query to use when highlighting.
|
||||
* @param array $query_parsed The *parsed* query to use when highlighting (the output of search::stas_parse())
|
||||
* @param string $context The context string to highlight.
|
||||
* @return string The highlighted (HTML) string.
|
||||
*/
|
||||
public static function highlight_context($query, $context)
|
||||
public static function highlight_context($query_parsed, $context)
|
||||
{
|
||||
$qterms = self::stas_parse(self::stas_split($query))["terms"];
|
||||
$qterms = $query_parsed["terms"];
|
||||
|
||||
foreach($qterms as $qterm) {
|
||||
// Stop words are marked by STAS
|
||||
|
|
Loading…
Reference in a new issue