mirror of
https://github.com/sbrl/Pepperminty-Wiki.git
synced 2024-11-21 16:13:00 +00:00
Improve search index write & lookup performance by implementing new arr_simple system
By serialising and deserialising lists of numbers with implode & explode, we can further cut down on the json_* calls which are reeeeeally slow.
This commit is contained in:
parent
25cef164ef
commit
8156055b5c
4 changed files with 46 additions and 12 deletions
|
@ -1,6 +1,12 @@
|
||||||
# Changelog
|
# Changelog
|
||||||
This file holds the changelog for Pepperminty Wiki. This is the master list of things that have changed (second only to the commit history!) - though the information for any particular release can also be found in the description of it's page for every release made on GitHub too.
|
This file holds the changelog for Pepperminty Wiki. This is the master list of things that have changed (second only to the commit history!) - though the information for any particular release can also be found in the description of it's page for every release made on GitHub too.
|
||||||
|
|
||||||
|
## v0.21-dev
|
||||||
|
|
||||||
|
## Changed
|
||||||
|
- Improved the search indexing system performance - again
|
||||||
|
- Another search index rebuild is required
|
||||||
|
-
|
||||||
|
|
||||||
## v0.20
|
## v0.20
|
||||||
_Just 1 change since the previous beta release._
|
_Just 1 change since the previous beta release._
|
||||||
|
|
|
@ -75,7 +75,7 @@
|
||||||
"version": "0.1",
|
"version": "0.1",
|
||||||
"author": "Starbeamrainbowlabs",
|
"author": "Starbeamrainbowlabs",
|
||||||
"description": "Displays a special page to aid in setting up a new wiki for the first time.",
|
"description": "Displays a special page to aid in setting up a new wiki for the first time.",
|
||||||
"lastupdate": 1574596170,
|
"lastupdate": 1574596190,
|
||||||
"optional": false,
|
"optional": false,
|
||||||
"extra_data": []
|
"extra_data": []
|
||||||
},
|
},
|
||||||
|
@ -135,7 +135,7 @@
|
||||||
"version": "0.10.1",
|
"version": "0.10.1",
|
||||||
"author": "Starbeamrainbowlabs",
|
"author": "Starbeamrainbowlabs",
|
||||||
"description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
|
"description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
|
||||||
"lastupdate": 1569096292,
|
"lastupdate": 1575675114,
|
||||||
"optional": false,
|
"optional": false,
|
||||||
"extra_data": []
|
"extra_data": []
|
||||||
},
|
},
|
||||||
|
|
|
@ -550,6 +550,9 @@ window.addEventListener("load", function(event) {
|
||||||
* Represents a key-value data store.
|
* Represents a key-value data store.
|
||||||
*/
|
*/
|
||||||
class StorageBox {
|
class StorageBox {
|
||||||
|
const MODE_JSON = 0;
|
||||||
|
const MODE_ARR_SIMPLE = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The SQLite database connection.
|
* The SQLite database connection.
|
||||||
* @var \PDO
|
* @var \PDO
|
||||||
|
@ -623,6 +626,19 @@ class StorageBox {
|
||||||
}
|
}
|
||||||
return $this->cache[$key]["value"];
|
return $this->cache[$key]["value"];
|
||||||
}
|
}
|
||||||
|
public function get_arr_simple(string $key, string $delimiter = "|") {
|
||||||
|
// If it's not in the cache, insert it
|
||||||
|
if(!isset($this->cache[$key])) {
|
||||||
|
$this->cache[$key] = [
|
||||||
|
"modified" => false,
|
||||||
|
"value" => explode($delimiter, $this->query(
|
||||||
|
"SELECT value FROM store WHERE key = :key;",
|
||||||
|
[ "key" => $key ]
|
||||||
|
)->fetchColumn())
|
||||||
|
];
|
||||||
|
}
|
||||||
|
return $this->cache[$key]["value"];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets a value in the data store.
|
* Sets a value in the data store.
|
||||||
|
@ -634,6 +650,14 @@ class StorageBox {
|
||||||
if(!isset($this->cache[$key])) $this->cache[$key] = [];
|
if(!isset($this->cache[$key])) $this->cache[$key] = [];
|
||||||
$this->cache[$key]["value"] = $value;
|
$this->cache[$key]["value"] = $value;
|
||||||
$this->cache[$key]["modified"] = true;
|
$this->cache[$key]["modified"] = true;
|
||||||
|
$this->cache[$key]["mode"] = self::MODE_JSON;
|
||||||
|
}
|
||||||
|
public function set_arr_simple(string $key, $value, string $delimiter = "|") : void {
|
||||||
|
if(!isset($this->cache[$key])) $this->cache[$key] = [];
|
||||||
|
$this->cache[$key]["value"] = $value;
|
||||||
|
$this->cache[$key]["modified"] = true;
|
||||||
|
$this->cache[$key]["delimiter"] = $delimiter;
|
||||||
|
$this->cache[$key]["mode"] = self::MODE_ARR_SIMPLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -646,6 +670,7 @@ class StorageBox {
|
||||||
if(isset($this->cache[$key]))
|
if(isset($this->cache[$key]))
|
||||||
unset($this->cache[$key]);
|
unset($this->cache[$key]);
|
||||||
// Remove it from disk
|
// Remove it from disk
|
||||||
|
// TODO: Queue this action for the transaction later
|
||||||
return $this->query(
|
return $this->query(
|
||||||
"DELETE FROM store WHERE key = :key;",
|
"DELETE FROM store WHERE key = :key;",
|
||||||
[ "key" => $key ]
|
[ "key" => $key ]
|
||||||
|
@ -676,7 +701,9 @@ class StorageBox {
|
||||||
"INSERT OR REPLACE INTO store(key, value) VALUES(:key, :value)",
|
"INSERT OR REPLACE INTO store(key, value) VALUES(:key, :value)",
|
||||||
[
|
[
|
||||||
"key" => $key,
|
"key" => $key,
|
||||||
"value" => json_encode($value_data["value"])
|
"value" => $value_data["mode"] == self::MODE_ARR_SIMPLE ?
|
||||||
|
implode($value_data["delimiter"], $value_data["value"]) :
|
||||||
|
json_encode($value_data["value"])
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -964,7 +991,7 @@ class search
|
||||||
// Delete the offsets
|
// Delete the offsets
|
||||||
self::$invindex->delete("$nterm|$pageid");
|
self::$invindex->delete("$nterm|$pageid");
|
||||||
// Delete the item from the list of pageids containing this term
|
// Delete the item from the list of pageids containing this term
|
||||||
$nterm_pageids = self::$invindex->get($nterm);
|
$nterm_pageids = self::$invindex->get_arr_simple($nterm);
|
||||||
array_splice($nterm_pageids, array_search($pageid, $nterm_pageids), 1);
|
array_splice($nterm_pageids, array_search($pageid, $nterm_pageids), 1);
|
||||||
if(empty($nterm_pageids)) { // No need to keep the pageid list if there's nothing in it
|
if(empty($nterm_pageids)) { // No need to keep the pageid list if there's nothing in it
|
||||||
self::$invindex->delete($nterm);
|
self::$invindex->delete($nterm);
|
||||||
|
@ -973,21 +1000,22 @@ class search
|
||||||
if($termlist_loc !== false) array_splice($termlist, $termlist_loc, 1);
|
if($termlist_loc !== false) array_splice($termlist, $termlist_loc, 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
self::$invindex->set($nterm, $nterm_pageids);
|
self::$invindex->get_arr_simple($nterm, $nterm_pageids);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge all the new / changed index entries into the inverted index
|
// Merge all the new / changed index entries into the inverted index
|
||||||
foreach($index as $nterm => $newentry) {
|
foreach($index as $nterm => $newentry) {
|
||||||
|
// if(!is_string($nterm)) $nterm = strval($nterm);
|
||||||
if(!self::$invindex->has($nterm)) {
|
if(!self::$invindex->has($nterm)) {
|
||||||
self::$invindex->set($nterm, []);
|
self::$invindex->set_arr_simple($nterm, []);
|
||||||
$termlist[] = $nterm;
|
$termlist[] = $nterm;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the nterm pageid list
|
// Update the nterm pageid list
|
||||||
$nterm_pageids = self::$invindex->get($nterm);
|
$nterm_pageids = self::$invindex->get_arr_simple($nterm);
|
||||||
if(array_search($pageid, $nterm_pageids) === false) {
|
if(array_search($pageid, $nterm_pageids) === false) {
|
||||||
$nterm_pageids[] = $pageid;
|
$nterm_pageids[] = $pageid;
|
||||||
self::$invindex->set($nterm, $nterm_pageids);
|
self::$invindex->set_arr_simple($nterm, $nterm_pageids);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store the offset list
|
// Store the offset list
|
||||||
|
@ -1004,7 +1032,7 @@ class search
|
||||||
public static function invindex_delete(int $pageid) {
|
public static function invindex_delete(int $pageid) {
|
||||||
$termlist = self::$invindex->get("|termlist|");
|
$termlist = self::$invindex->get("|termlist|");
|
||||||
foreach($termlist as $nterm) {
|
foreach($termlist as $nterm) {
|
||||||
$nterm_pageids = self::$invindex->get("$nterm");
|
$nterm_pageids = self::$invindex->get_arr_simple($nterm);
|
||||||
$nterm_loc = array_search($pageid, $nterm_pageids);
|
$nterm_loc = array_search($pageid, $nterm_pageids);
|
||||||
// If this nterm doesn't appear in the list, we're not interested
|
// If this nterm doesn't appear in the list, we're not interested
|
||||||
if($nterm_loc === false)
|
if($nterm_loc === false)
|
||||||
|
@ -1022,7 +1050,7 @@ class search
|
||||||
array_splice($termlist, array_search($nterm, $termlist), 1);
|
array_splice($termlist, array_search($nterm, $termlist), 1);
|
||||||
}
|
}
|
||||||
else // Save the document id list back, since it still contains other pageids
|
else // Save the document id list back, since it still contains other pageids
|
||||||
self::$invindex->set($nterm, $nterm_pageids);
|
self::$invindex->set_arr_simple($nterm, $nterm_pageids);
|
||||||
}
|
}
|
||||||
// Save the termlist back to the store
|
// Save the termlist back to the store
|
||||||
self::$invindex->set("|termlist|", $termlist);
|
self::$invindex->set("|termlist|", $termlist);
|
||||||
|
@ -1221,7 +1249,7 @@ class search
|
||||||
continue; // Skip if it's not in the index
|
continue; // Skip if it's not in the index
|
||||||
|
|
||||||
// For each page that contains this term.....
|
// For each page that contains this term.....
|
||||||
$term_pageids = self::$invindex->get($term_def["term"]);
|
$term_pageids = self::$invindex->get_arr_simple($term_def["term"]);
|
||||||
foreach($term_pageids as $pageid) {
|
foreach($term_pageids as $pageid) {
|
||||||
// Check to see if it contains any words we should exclude
|
// Check to see if it contains any words we should exclude
|
||||||
$skip = false;
|
$skip = false;
|
||||||
|
|
2
version
2
version
|
@ -1 +1 @@
|
||||||
v0.20
|
v0.21-dev
|
||||||
|
|
Loading…
Reference in a new issue