1
0
Fork 0
mirror of https://github.com/sbrl/Pepperminty-Wiki.git synced 2024-11-26 05:32:59 +00:00

Improve search invindex alteration performance

This will be especially noticable when using invindex-rebuild
This commit is contained in:
Starbeamrainbowlabs 2019-08-15 23:06:06 +01:00
parent a67b67817f
commit 0a5ba3ff59
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
3 changed files with 18 additions and 16 deletions

View file

@ -3,6 +3,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t
## v0.20-dev ## v0.20-dev
- Made `build.sh` build script more robust, especially when generating the documentation. - Made `build.sh` build script more robust, especially when generating the documentation.
- Improved search index update performance - especially when rebuilding the entire index through the master control panel
## v0.19 ## v0.19

View file

@ -95,7 +95,7 @@
"version": "0.4.2", "version": "0.4.2",
"author": "Starbeamrainbowlabs", "author": "Starbeamrainbowlabs",
"description": "Adds the ability to keep unlimited page history, limited only by your disk space. Note that this doesn't store file history (yet). Currently depends on feature-recent-changes for rendering of the history page.", "description": "Adds the ability to keep unlimited page history, limited only by your disk space. Note that this doesn't store file history (yet). Currently depends on feature-recent-changes for rendering of the history page.",
"lastupdate": 1559400114, "lastupdate": 1565287856,
"optional": false, "optional": false,
"extra_data": [] "extra_data": []
}, },
@ -132,10 +132,10 @@
{ {
"id": "feature-search", "id": "feature-search",
"name": "Search", "name": "Search",
"version": "0.7.1", "version": "0.8",
"author": "Starbeamrainbowlabs", "author": "Starbeamrainbowlabs",
"description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.", "description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"lastupdate": 1557653831, "lastupdate": 1565906705,
"optional": false, "optional": false,
"extra_data": [] "extra_data": []
}, },
@ -145,17 +145,17 @@
"version": "0.2", "version": "0.2",
"author": "Starbeamrainbowlabs", "author": "Starbeamrainbowlabs",
"description": "An extensible statistics calculation system. Comes with a range of built-in statistics, but can be extended by other modules too.", "description": "An extensible statistics calculation system. Comes with a range of built-in statistics, but can be extended by other modules too.",
"lastupdate": 1530614499, "lastupdate": 1565741951,
"optional": false, "optional": false,
"extra_data": [] "extra_data": []
}, },
{ {
"id": "feature-upload", "id": "feature-upload",
"name": "Uploader", "name": "Uploader",
"version": "0.6", "version": "0.6.1",
"author": "Starbeamrainbowlabs", "author": "Starbeamrainbowlabs",
"description": "Adds the ability to upload files to Pepperminty Wiki. Uploaded files act as pages and have the special 'File\/' prefix.", "description": "Adds the ability to upload files to Pepperminty Wiki. Uploaded files act as pages and have the special 'File\/' prefix.",
"lastupdate": 1559421090, "lastupdate": 1565906134,
"optional": false, "optional": false,
"extra_data": [] "extra_data": []
}, },
@ -215,7 +215,7 @@
"version": "0.17.4", "version": "0.17.4",
"author": "Starbeamrainbowlabs", "author": "Starbeamrainbowlabs",
"description": "Allows you to edit pages by adding the edit and save actions. You should probably include this one.", "description": "Allows you to edit pages by adding the edit and save actions. You should probably include this one.",
"lastupdate": 1559400186, "lastupdate": 1565287856,
"optional": false, "optional": false,
"extra_data": { "extra_data": {
"diff.min.js": "https:\/\/cdnjs.cloudflare.com\/ajax\/libs\/jsdiff\/2.2.2\/diff.min.js" "diff.min.js": "https:\/\/cdnjs.cloudflare.com\/ajax\/libs\/jsdiff\/2.2.2\/diff.min.js"
@ -247,7 +247,7 @@
"version": "0.11.1", "version": "0.11.1",
"author": "Starbeamrainbowlabs", "author": "Starbeamrainbowlabs",
"description": "Adds a page that lists all the pages in the index along with their metadata.", "description": "Adds a page that lists all the pages in the index along with their metadata.",
"lastupdate": 1557182882, "lastupdate": 1565741951,
"optional": false, "optional": false,
"extra_data": [] "extra_data": []
}, },

View file

@ -1,7 +1,7 @@
<?php <?php
register_module([ register_module([
"name" => "Search", "name" => "Search",
"version" => "0.7.1", "version" => "0.8",
"author" => "Starbeamrainbowlabs", "author" => "Starbeamrainbowlabs",
"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.", "description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"id" => "feature-search", "id" => "feature-search",
@ -596,7 +596,7 @@ class search
/** /**
* Rebuilds the master inverted index and clears the page id index. * Rebuilds the master inverted index and clears the page id index.
* @param boolean $output Whether to send progress information to the user's browser. * @param bool $output Whether to send progress information to the user's browser.
*/ */
public static function rebuild_invindex($output = true) public static function rebuild_invindex($output = true)
{ {
@ -715,36 +715,37 @@ class search
{ {
// Remove all the subentries that were removed since last time // Remove all the subentries that were removed since last time
foreach($removals as $nterm) foreach($removals as $nterm)
{
unset($invindex[$nterm][$pageid]); unset($invindex[$nterm][$pageid]);
}
// Merge all the new / changed index entries into the inverted index // Merge all the new / changed index entries into the inverted index
foreach($index as $nterm => $newentry) foreach($index as $nterm => $newentry) {
{
// If the nterm isn't in the inverted index, then create a space for it // If the nterm isn't in the inverted index, then create a space for it
if(!isset($invindex[$nterm])) $invindex[$nterm] = []; if(!isset($invindex[$nterm])) $invindex[$nterm] = [];
$invindex[$nterm][$pageid] = $newentry; $invindex[$nterm][$pageid] = $newentry;
// Sort the page entries for this word by frequency // Sort the page entries for this word by frequency
/*
uasort($invindex[$nterm], function($a, $b) { uasort($invindex[$nterm], function($a, $b) {
if($a["freq"] == $b["freq"]) return 0; if($a["freq"] == $b["freq"]) return 0;
return ($a["freq"] < $b["freq"]) ? +1 : -1; return ($a["freq"] < $b["freq"]) ? +1 : -1;
}); });
*/
} }
/*
// Sort the inverted index by rank // Sort the inverted index by rank
uasort($invindex, function($a, $b) { uasort($invindex, function($a, $b) {
$ac = count($a); $bc = count($b); $ac = count($a); $bc = count($b);
if($ac == $bc) return 0; if($ac == $bc) return 0;
return ($ac < $bc) ? +1 : -1; return ($ac < $bc) ? +1 : -1;
}); });
*/
} }
/** /**
* Deletes the given pageid from the given pageindex. * Deletes the given pageid from the given pageindex.
* @param inverted_index &$invindex The inverted index. * @param array &$invindex The inverted index.
* @param number $pageid The pageid to remove. * @param int $pageid The pageid to remove.
*/ */
public static function delete_entry(&$invindex, $pageid) public static function delete_entry(&$invindex, $pageid)
{ {