mirror of
https://github.com/sbrl/Pepperminty-Wiki.git
synced 2024-11-22 16:33:00 +00:00
Reduce number of calls to stas_parse()
This commit is contained in:
parent
9f4dd1c149
commit
c1af093ada
3 changed files with 21 additions and 18 deletions
|
@ -47,6 +47,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t
|
||||||
- Optimisation: Refactor `stas_split()` to be faster (informal testing shows ~18% faster → 4% total time)
|
- Optimisation: Refactor `stas_split()` to be faster (informal testing shows ~18% faster → 4% total time)
|
||||||
- [Module Api] Optimisation: Remove `search::transliterate` because it has a huge overhead. Use `search::$literator->transliterate()` instead.
|
- [Module Api] Optimisation: Remove `search::transliterate` because it has a huge overhead. Use `search::$literator->transliterate()` instead.
|
||||||
- [Module Api] Add new `absolute` and `html` optional boolean arguments to `render_timestamp()`
|
- [Module Api] Add new `absolute` and `html` optional boolean arguments to `render_timestamp()`
|
||||||
|
- [Module Api] `search::extract_context()` and `search::highlight_context()` now take in a _parsed_ query (with `search::stas_parse()`), not a raw string
|
||||||
|
|
||||||
|
|
||||||
## v0.20.3-hotfix3
|
## v0.20.3-hotfix3
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
register_module([
|
register_module([
|
||||||
"name" => "Search",
|
"name" => "Search",
|
||||||
"version" => "0.13",
|
"version" => "0.13.1",
|
||||||
"author" => "Starbeamrainbowlabs",
|
"author" => "Starbeamrainbowlabs",
|
||||||
"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
|
"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
|
||||||
"id" => "feature-search",
|
"id" => "feature-search",
|
||||||
|
@ -141,7 +141,8 @@ register_module([
|
||||||
$env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3);
|
$env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3);
|
||||||
|
|
||||||
$time_start = microtime(true);
|
$time_start = microtime(true);
|
||||||
$results = search::invindex_query($_GET["query"]);
|
$query_parsed = null;
|
||||||
|
$results = search::invindex_query($_GET["query"], $query_parsed);
|
||||||
$resultCount = count($results);
|
$resultCount = count($results);
|
||||||
$env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3);
|
$env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3);
|
||||||
|
|
||||||
|
@ -152,7 +153,7 @@ register_module([
|
||||||
foreach($results as &$result) {
|
foreach($results as &$result) {
|
||||||
$result["context"] = search::extract_context(
|
$result["context"] = search::extract_context(
|
||||||
$result["pagename"],
|
$result["pagename"],
|
||||||
$_GET["query"],
|
$query_parsed,
|
||||||
file_get_contents($env->storage_prefix . $result["pagename"] . ".md")
|
file_get_contents($env->storage_prefix . $result["pagename"] . ".md")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -203,7 +204,7 @@ register_module([
|
||||||
$content .= "<br /><small><em>Pssst! Power users can make use of $settings->sitename's advanced query syntax. Learn about it <a href='?action=help#27-search'>here</a>!</em></small></p>";
|
$content .= "<br /><small><em>Pssst! Power users can make use of $settings->sitename's advanced query syntax. Learn about it <a href='?action=help#27-search'>here</a>!</em></small></p>";
|
||||||
|
|
||||||
if(module_exists("page-list")) {
|
if(module_exists("page-list")) {
|
||||||
// TODO: Refactor ths to use STAS
|
// TODO: Refactor this to use STAS
|
||||||
$nterms = search::tokenize($query);
|
$nterms = search::tokenize($query);
|
||||||
$nterms_regex = implode("|", array_map(function($nterm) {
|
$nterms_regex = implode("|", array_map(function($nterm) {
|
||||||
return preg_quote(strtolower(trim($nterm)));
|
return preg_quote(strtolower(trim($nterm)));
|
||||||
|
@ -236,7 +237,7 @@ register_module([
|
||||||
$context = mb_substr($pagesource, 0, $settings->search_characters_context * 2);
|
$context = mb_substr($pagesource, 0, $settings->search_characters_context * 2);
|
||||||
//echo("'Generated search context for " . $result["pagename"] . ": $context'\n");
|
//echo("'Generated search context for " . $result["pagename"] . ": $context'\n");
|
||||||
$context = search::highlight_context(
|
$context = search::highlight_context(
|
||||||
$_GET["query"],
|
$query_parsed,
|
||||||
preg_replace('/</u', '<', $context)
|
preg_replace('/</u', '<', $context)
|
||||||
);
|
);
|
||||||
/*if(strlen($context) == 0)
|
/*if(strlen($context) == 0)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
register_module([
|
register_module([
|
||||||
"name" => "Library: Search engine",
|
"name" => "Library: Search engine",
|
||||||
"version" => "0.13",
|
"version" => "0.13.1",
|
||||||
"author" => "Starbeamrainbowlabs",
|
"author" => "Starbeamrainbowlabs",
|
||||||
"description" => "A library module that provides the backend to the search engine module.",
|
"description" => "A library module that provides the backend to the search engine module.",
|
||||||
"id" => "lib-search-engine",
|
"id" => "lib-search-engine",
|
||||||
|
@ -670,16 +670,17 @@ class search
|
||||||
"exact" => $exact // If true then we shouldn't try to autocorrect it
|
"exact" => $exact // If true then we shouldn't try to autocorrect it
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Correct typos, but only if that's enabled
|
// Correct typos, but only if that's enabled
|
||||||
if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) {
|
if(module_exists("feature-search-didyoumean") && $settings->search_didyoumean_enabled) {
|
||||||
$terms_count = count($result["terms"]);
|
$terms_count = count($result["terms"]);
|
||||||
for($i = 0; $i < $terms_count; $i++) {
|
for($i = 0; $i < $terms_count; $i++) {
|
||||||
error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)");
|
// error_log("[stas_parse/didyoumean] Now looking at #$i: ".var_export($result["terms"][$i], true)."(total count: $terms_count)");
|
||||||
if($result["terms"][$i]["exact"] || // Skip exact-only
|
if($result["terms"][$i]["exact"] || // Skip exact-only
|
||||||
$result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words
|
$result["terms"][$i]["weight"] < 1 || // Skip stop & irrelevant words
|
||||||
self::invindex_term_exists($result["terms"][$i]["term"])) {
|
self::invindex_term_exists($result["terms"][$i]["term"]))
|
||||||
$i++; continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
// It's not a stop word or in the index, try and correct it
|
// It's not a stop word or in the index, try and correct it
|
||||||
// self::didyoumean_correct auto-loads the didyoumean index on-demand
|
// self::didyoumean_correct auto-loads the didyoumean index on-demand
|
||||||
|
@ -687,7 +688,7 @@ class search
|
||||||
// Make a note if we fail to correct a term
|
// Make a note if we fail to correct a term
|
||||||
if(!is_string($correction)) {
|
if(!is_string($correction)) {
|
||||||
$result["terms"][$i]["corrected"] = false;
|
$result["terms"][$i]["corrected"] = false;
|
||||||
$i++; continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$result["terms"][$i]["term_before"] = $result["terms"][$i]["term"];
|
$result["terms"][$i]["term_before"] = $result["terms"][$i]["term"];
|
||||||
|
@ -718,7 +719,7 @@ class search
|
||||||
* Note that this automatically pushes the query string through STAS which
|
* Note that this automatically pushes the query string through STAS which
|
||||||
* can be a fairly expensive operation, so use 2nd argument if you need
|
* can be a fairly expensive operation, so use 2nd argument if you need
|
||||||
* to debug the STAS parsing result if possible.
|
* to debug the STAS parsing result if possible.
|
||||||
* @param string $query The search query.
|
* @param string $query The search query. If an array is passed, it is assumed it has already been pre-parsed with search::stas_parse().
|
||||||
* @param &stdClass $query_stas An object to fill with the result of the STAS parsing.
|
* @param &stdClass $query_stas An object to fill with the result of the STAS parsing.
|
||||||
* @return array An array of matching pages.
|
* @return array An array of matching pages.
|
||||||
*/
|
*/
|
||||||
|
@ -892,16 +893,16 @@ class search
|
||||||
* Extracts a context string (in HTML) given a search query that could be displayed
|
* Extracts a context string (in HTML) given a search query that could be displayed
|
||||||
* in a list of search results.
|
* in a list of search results.
|
||||||
* @param string $pagename The name of the paget that this source belongs to. Used when consulting the inverted index.
|
* @param string $pagename The name of the paget that this source belongs to. Used when consulting the inverted index.
|
||||||
* @param string $query The search queary to generate the context for.
|
* @param string $query_parsed The *parsed* search query to generate the context for (basically the output of search::stas_parse()).
|
||||||
* @param string $source The page source to extract the context from.
|
* @param string $source The page source to extract the context from.
|
||||||
* @return string The generated context string.
|
* @return string The generated context string.
|
||||||
*/
|
*/
|
||||||
public static function extract_context($pagename, $query, $source)
|
public static function extract_context($pagename, $query_parsed, $source)
|
||||||
{
|
{
|
||||||
global $settings;
|
global $settings;
|
||||||
|
|
||||||
$pageid = ids::getid($pagename);
|
$pageid = ids::getid($pagename);
|
||||||
$nterms = self::stas_parse(self::stas_split($query))["terms"];
|
$nterms = $query_parsed["terms"];
|
||||||
|
|
||||||
// Query the inverted index for offsets
|
// Query the inverted index for offsets
|
||||||
$matches = [];
|
$matches = [];
|
||||||
|
@ -978,13 +979,13 @@ class search
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Highlights the keywords of a context string.
|
* Highlights the keywords of a context string.
|
||||||
* @param string $query The query to use when highlighting.
|
* @param array $query_parsed The *parsed* query to use when highlighting (the output of search::stas_parse())
|
||||||
* @param string $context The context string to highlight.
|
* @param string $context The context string to highlight.
|
||||||
* @return string The highlighted (HTML) string.
|
* @return string The highlighted (HTML) string.
|
||||||
*/
|
*/
|
||||||
public static function highlight_context($query, $context)
|
public static function highlight_context($query_parsed, $context)
|
||||||
{
|
{
|
||||||
$qterms = self::stas_parse(self::stas_split($query))["terms"];
|
$qterms = $query_parsed["terms"];
|
||||||
|
|
||||||
foreach($qterms as $qterm) {
|
foreach($qterms as $qterm) {
|
||||||
// Stop words are marked by STAS
|
// Stop words are marked by STAS
|
||||||
|
|
Loading…
Reference in a new issue