Remove search::transliterate, as it has a hgue performance overhead.

Use search::$literator->transliterate() directly instead.
This commit is contained in:
Starbeamrainbowlabs 2019-12-08 21:04:59 +00:00
parent 34fb821804
commit d53f0ed85a
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
3 changed files with 25 additions and 22 deletions

View File

@ -8,6 +8,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t
- Another search index rebuild is required - Another search index rebuild is required
- Optimisation: Don't generate the list of pages for the datalist if it isn't going to be displayed (especially noticeable on wikis with lots of pages) - Optimisation: Don't generate the list of pages for the datalist if it isn't going to be displayed (especially noticeable on wikis with lots of pages)
- Optimisation: Don't load the statistics index if it's not needed (also esp. noticeable on wikis with lots of pages) - Optimisation: Don't load the statistics index if it's not needed (also esp. noticeable on wikis with lots of pages)
- [Module Api] Optimisation: Remove `search::transliterate` because it has a huge overhead. Use `search::$literator->transliterate()` instead.
## v0.20 ## v0.20
_Just 1 change since the previous beta release._ _Just 1 change since the previous beta release._

View File

@ -132,20 +132,20 @@
{ {
"id": "feature-search", "id": "feature-search",
"name": "Search", "name": "Search",
"version": "0.10.1", "version": "0.11",
"author": "Starbeamrainbowlabs", "author": "Starbeamrainbowlabs",
"description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.", "description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"lastupdate": 1575675114, "lastupdate": 1575838820,
"optional": false, "optional": false,
"extra_data": [] "extra_data": []
}, },
{ {
"id": "feature-stats", "id": "feature-stats",
"name": "Statistics", "name": "Statistics",
"version": "0.2.2", "version": "0.3",
"author": "Starbeamrainbowlabs", "author": "Starbeamrainbowlabs",
"description": "An extensible statistics calculation system. Comes with a range of built-in statistics, but can be extended by other modules too.", "description": "An extensible statistics calculation system. Comes with a range of built-in statistics, but can be extended by other modules too.",
"lastupdate": 1575837611, "lastupdate": 1575837905,
"optional": false, "optional": false,
"extra_data": [] "extra_data": []
}, },

View File

@ -1,7 +1,7 @@
<?php <?php
register_module([ register_module([
"name" => "Search", "name" => "Search",
"version" => "0.10.1", "version" => "0.11",
"author" => "Starbeamrainbowlabs", "author" => "Starbeamrainbowlabs",
"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.", "description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"id" => "feature-search", "id" => "feature-search",
@ -443,7 +443,7 @@ register_module([
exit("Error: The type '$type' is not one of the supported output types. Available values: json, opensearch. Default: json"); exit("Error: The type '$type' is not one of the supported output types. Available values: json, opensearch. Default: json");
} }
$query = search::transliterate($_GET["query"]); $query = search::$literator->transliterate($_GET["query"]);
// Rank each page name // Rank each page name
$results = []; $results = [];
@ -451,7 +451,7 @@ register_module([
$results[] = [ $results[] = [
"pagename" => $pageName, "pagename" => $pageName,
// Costs: Insert: 1, Replace: 8, Delete: 6 // Costs: Insert: 1, Replace: 8, Delete: 6
"distance" => levenshtein($query, search::transliterate($pageName), 1, 8, 6) "distance" => levenshtein($query, search::$literator->transliterate($pageName), 1, 8, 6)
]; ];
} }
@ -782,23 +782,23 @@ class search
*/ */
private static $invindex = null; private static $invindex = null;
/** /**
* Cache variable for the transliterator instance used by search::transliterate. * The transliterator that can be used to transliterate strings.
* Transliterated strings are more suitable for use with the search index.
* Note that this is no longer wrapped in a function as of v0.21 for
* performance reasons.
* @var Transliterator * @var Transliterator
*/ */
private static $literator = null; public static $literator = null;
/** /**
* Transliterates a string to make it more suitable for entry into the search index. * Initialises the search system.
* @param string $str The string to transliterate. * Do not call this function! It is called automatically.
* @return string The transliterated string.
*/ */
public static function transliterate(string $str) : string { public static function init() {
if(self::$literator == null) self::$literator = Transliterator::createFromRules(':: Any-Latin; :: Latin-ASCII; :: NFD; :: [:Nonspacing Mark:] Remove; :: Lower(); :: NFC;', Transliterator::FORWARD);
self::$literator = Transliterator::createFromRules(':: Any-Latin; :: Latin-ASCII; :: NFD; :: [:Nonspacing Mark:] Remove; :: Lower(); :: NFC;', Transliterator::FORWARD);
return self::$literator->transliterate($str);
} }
/** /**
* Converts a source string into an index of search terms that can be * Converts a source string into an index of search terms that can be
* merged into an inverted index. * merged into an inverted index.
@ -843,7 +843,7 @@ class search
// We don't need to normalise here because the transliterator handles // We don't need to normalise here because the transliterator handles
// this for us. Also, we can't move the literator to a static member // this for us. Also, we can't move the literator to a static member
// variable because PHP doesn't like it very much // variable because PHP doesn't like it very much
$source = self::transliterate($source); $source = self::$literator->transliterate($source);
$source = preg_replace('/[\[\]\|\{\}\/]/u', " ", $source); $source = preg_replace('/[\[\]\|\{\}\/]/u', " ", $source);
return preg_split("/((^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))|\|/u", $source, -1, $flags); return preg_split("/((^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))|\|/u", $source, -1, $flags);
} }
@ -1071,7 +1071,7 @@ class search
* @param string $query The queyr string to split. * @param string $query The queyr string to split.
*/ */
public function stas_split($query) { public function stas_split($query) {
$chars = str_split(self::transliterate($query)); $chars = str_split(self::$literator->transliterate($query));
$terms = []; $terms = [];
$next_term = ""; $next_term = "";
$toggle_state = false; // true = now inside, false = now outside $toggle_state = false; // true = now inside, false = now outside
@ -1218,7 +1218,7 @@ class search
global $settings, $pageindex; global $settings, $pageindex;
$query_stas = self::stas_parse( $query_stas = self::stas_parse(
self::stas_split(self::transliterate($query)) self::stas_split(self::$literator->transliterate($query))
); );
/* Sub-array format: /* Sub-array format:
@ -1290,8 +1290,8 @@ class search
// Setup a variable to hold the current page's id // Setup a variable to hold the current page's id
$pageid = null; // Cache the page id $pageid = null; // Cache the page id
$lit_title = self::transliterate($pagename); $lit_title = self::$literator->transliterate($pagename);
$lit_tags = isset($pagedata->tags) ? self::transliterate(implode(" ", $pagedata->tags)) : null; $lit_tags = isset($pagedata->tags) ? self::$literator->transliterate(implode(" ", $pagedata->tags)) : null;
// Make sure that the title & tags don't contain a term we should exclude // Make sure that the title & tags don't contain a term we should exclude
$skip = false; $skip = false;
@ -1489,5 +1489,7 @@ class search
return $context; return $context;
} }
} }
// Run the init function
search::init();
?> ?>