Improve search invindex alteration performance

This will be especially noticable when using invindex-rebuild
This commit is contained in:
Starbeamrainbowlabs 2019-08-15 23:06:06 +01:00
parent a67b67817f
commit 0a5ba3ff59
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
3 changed files with 18 additions and 16 deletions

View File

@ -3,6 +3,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t
## v0.20-dev
- Made `build.sh` build script more robust, especially when generating the documentation.
- Improved search index update performance - especially when rebuilding the entire index through the master control panel
## v0.19

View File

@ -95,7 +95,7 @@
"version": "0.4.2",
"author": "Starbeamrainbowlabs",
"description": "Adds the ability to keep unlimited page history, limited only by your disk space. Note that this doesn't store file history (yet). Currently depends on feature-recent-changes for rendering of the history page.",
"lastupdate": 1559400114,
"lastupdate": 1565287856,
"optional": false,
"extra_data": []
},
@ -132,10 +132,10 @@
{
"id": "feature-search",
"name": "Search",
"version": "0.7.1",
"version": "0.8",
"author": "Starbeamrainbowlabs",
"description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"lastupdate": 1557653831,
"lastupdate": 1565906705,
"optional": false,
"extra_data": []
},
@ -145,17 +145,17 @@
"version": "0.2",
"author": "Starbeamrainbowlabs",
"description": "An extensible statistics calculation system. Comes with a range of built-in statistics, but can be extended by other modules too.",
"lastupdate": 1530614499,
"lastupdate": 1565741951,
"optional": false,
"extra_data": []
},
{
"id": "feature-upload",
"name": "Uploader",
"version": "0.6",
"version": "0.6.1",
"author": "Starbeamrainbowlabs",
"description": "Adds the ability to upload files to Pepperminty Wiki. Uploaded files act as pages and have the special 'File\/' prefix.",
"lastupdate": 1559421090,
"lastupdate": 1565906134,
"optional": false,
"extra_data": []
},
@ -215,7 +215,7 @@
"version": "0.17.4",
"author": "Starbeamrainbowlabs",
"description": "Allows you to edit pages by adding the edit and save actions. You should probably include this one.",
"lastupdate": 1559400186,
"lastupdate": 1565287856,
"optional": false,
"extra_data": {
"diff.min.js": "https:\/\/cdnjs.cloudflare.com\/ajax\/libs\/jsdiff\/2.2.2\/diff.min.js"
@ -247,7 +247,7 @@
"version": "0.11.1",
"author": "Starbeamrainbowlabs",
"description": "Adds a page that lists all the pages in the index along with their metadata.",
"lastupdate": 1557182882,
"lastupdate": 1565741951,
"optional": false,
"extra_data": []
},

View File

@ -1,7 +1,7 @@
<?php
register_module([
"name" => "Search",
"version" => "0.7.1",
"version" => "0.8",
"author" => "Starbeamrainbowlabs",
"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"id" => "feature-search",
@ -596,7 +596,7 @@ class search
/**
* Rebuilds the master inverted index and clears the page id index.
* @param boolean $output Whether to send progress information to the user's browser.
* @param bool $output Whether to send progress information to the user's browser.
*/
public static function rebuild_invindex($output = true)
{
@ -715,36 +715,37 @@ class search
{
// Remove all the subentries that were removed since last time
foreach($removals as $nterm)
{
unset($invindex[$nterm][$pageid]);
}
// Merge all the new / changed index entries into the inverted index
foreach($index as $nterm => $newentry)
{
foreach($index as $nterm => $newentry) {
// If the nterm isn't in the inverted index, then create a space for it
if(!isset($invindex[$nterm])) $invindex[$nterm] = [];
$invindex[$nterm][$pageid] = $newentry;
// Sort the page entries for this word by frequency
/*
uasort($invindex[$nterm], function($a, $b) {
if($a["freq"] == $b["freq"]) return 0;
return ($a["freq"] < $b["freq"]) ? +1 : -1;
});
*/
}
/*
// Sort the inverted index by rank
uasort($invindex, function($a, $b) {
$ac = count($a); $bc = count($b);
if($ac == $bc) return 0;
return ($ac < $bc) ? +1 : -1;
});
*/
}
/**
* Deletes the given pageid from the given pageindex.
* @param inverted_index &$invindex The inverted index.
* @param number $pageid The pageid to remove.
* @param array &$invindex The inverted index.
* @param int $pageid The pageid to remove.
*/
public static function delete_entry(&$invindex, $pageid)
{