Fix a *huge* number of bugs in the new search system, but it's not ready just yet

2024-11-22 04:23:01 +00:00 · 2019-08-22 21:38:17 +01:00 · 2019-08-22 21:38:17 +01:00 · edf1be5801
commit edf1be5801
parent e08e775d98
8 changed files with 237 additions and 126 deletions
--- a/core/02-environment.php
+++ b/core/02-environment.php
@ -41,7 +41,7 @@ $paths = new stdClass();
 /** The pageindex. Contains extensive information about all pages currently in this wiki. Individual entries for pages may be extended with arbitrary properties. */
 $paths->pageindex = "pageindex.json";
 /** The inverted index used for searching. Use the `search` class to interact with this - otherwise your brain might explode :P */
-$paths->searchindex = "invindex.json";
+$paths->searchindex = "invindex.sqlite";
 /** The index that maps ids to page names. Use the `ids` class to interact with it :-) */
 $paths->idindex = "idindex.json";
 /** The cache of the most recently calculated statistics. */
--- a/core/05-functions.php
+++ b/core/05-functions.php
@ -110,6 +110,39 @@ function glob_recursive($pattern, $flags = 0)
 	return $files;
 }

+/**
+ * Resolves a relative path against a given base directory.
+ * @apiVersion	0.20.0
+ * @source	https://stackoverflow.com/a/44312137/1460422
+ * @param	string		$path		The relative path to resolve.
+ * @param	string|null	$basePath	The base directory to resolve against.
+ * @return	string		An absolute path.
+ */
+function path_resolve(string $path, string $basePath = null) {
+    // Make absolute path
+    if (substr($path, 0, 1) !== DIRECTORY_SEPARATOR) {
+        if ($basePath === null) {
+            // Get PWD first to avoid getcwd() resolving symlinks if in symlinked folder
+            $path=(getenv('PWD') ?: getcwd()).DIRECTORY_SEPARATOR.$path;
+        } elseif (strlen($basePath)) {
+            $path=$basePath.DIRECTORY_SEPARATOR.$path;
+        }
+    }
+
+    // Resolve '.' and '..'
+    $components=array();
+    foreach(explode(DIRECTORY_SEPARATOR, rtrim($path, DIRECTORY_SEPARATOR)) as $name) {
+        if ($name === '..') {
+            array_pop($components);
+        } elseif ($name !== '.' && !(count($components) && $name === '')) {
+            // … && !(count($components) && $name === '') - we want to keep initial '/' for abs paths
+            $components[]=$name;
+        }
+    }
+
+    return implode(DIRECTORY_SEPARATOR, $components);
+}
+
 /**
 * Gets the name of the parent page to the specified page.
 * @apiVersion 0.15.0
--- a/module_index.json
+++ b/module_index.json
@ -82,10 +82,10 @@
    {
        "id": "feature-guiconfig",
        "name": "Settings GUI",
-        "version": "0.1.4",
+        "version": "0.1.5",
        "author": "Starbeamrainbowlabs",
        "description": "The module everyone has been waiting for! Adds a web based gui that lets mods change the wiki settings.",
-        "lastupdate": 1557575008,
+        "lastupdate": 1566498857,
        "optional": false,
        "extra_data": []
    },
@ -132,10 +132,10 @@
    {
        "id": "feature-search",
        "name": "Search",
-        "version": "0.8",
+        "version": "0.10",
        "author": "Starbeamrainbowlabs",
        "description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
-        "lastupdate": 1565909052,
+        "lastupdate": 1566506237,
        "optional": false,
        "extra_data": []
    },
@ -202,20 +202,20 @@
    {
        "id": "page-delete",
        "name": "Page deleter",
-        "version": "0.10.1",
+        "version": "0.10.2",
        "author": "Starbeamrainbowlabs",
        "description": "Adds an action to allow administrators to delete pages.",
-        "lastupdate": 1559400151,
+        "lastupdate": 1566498558,
        "optional": false,
        "extra_data": []
    },
    {
        "id": "page-edit",
        "name": "Page editor",
-        "version": "0.17.4",
+        "version": "0.17.5",
        "author": "Starbeamrainbowlabs",
        "description": "Allows you to edit pages by adding the edit and save actions. You should probably include this one.",
-        "lastupdate": 1565287856,
+        "lastupdate": 1566498562,
        "optional": false,
        "extra_data": {
            "diff.min.js": "https:\/\/cdnjs.cloudflare.com\/ajax\/libs\/jsdiff\/2.2.2\/diff.min.js"
@ -234,10 +234,10 @@
    {
        "id": "page-help",
        "name": "Help page",
-        "version": "0.9.3",
+        "version": "0.9.4",
        "author": "Starbeamrainbowlabs",
        "description": "Adds a rather useful help page. Access through the 'help' action. This module also exposes help content added to Pepperminty Wiki's inbuilt invisible help section system.",
-        "lastupdate": 1492433537,
+        "lastupdate": 1566498566,
        "optional": false,
        "extra_data": []
    },
@ -326,8 +326,8 @@
        "name": "Parsedown",
        "version": "0.10",
        "author": "Emanuil Rusev & Starbeamrainbowlabs",
-        "description": "An upgraded (now default!) parser based on Emanuil Rusev's Parsedown Extra PHP library (https:\/\/github.com\/erusev\/parsedown-extra), which is licensed MIT. Please be careful, as this module adds some weight to your installation, and also *requires* write access to the disk on first load.",
-        "lastupdate": 1551564416,
+        "description": "An upgraded (now default!) parser based on Emanuil Rusev's Parsedown Extra PHP library (https:\/\/github.com\/erusev\/parsedown-extra), which is licensed MIT. Please be careful, as this module adds some weight to your installation.",
+        "lastupdate": 1566070821,
        "optional": false,
        "extra_data": {
            "Parsedown.php": "https:\/\/raw.githubusercontent.com\/erusev\/parsedown\/819c68899d593503180ed79ef4be5a4dcd8c5f92\/Parsedown.php",
--- a/modules/feature-guiconfig.php
+++ b/modules/feature-guiconfig.php
@ -1,7 +1,7 @@
 <?php
 register_module([
 	"name" => "Settings GUI",
-	"version" => "0.1.4",
+	"version" => "0.1.5",
 	"author" => "Starbeamrainbowlabs",
 	"description" => "The module everyone has been waiting for! Adds a web based gui that lets mods change the wiki settings.",
 	"id" => "feature-guiconfig",
@ -65,6 +65,8 @@ window.addEventListener("load", function(event) {
 			if(message.startsWith("Done! Saving new search index to"))
 				rebuildActionEvents.close();
 		});
+		// Close the connection on error & don't try again
+		rebuildActionEvents.addEventListener("error", (_event) => rebuildActionEvents.close());
 	});
 });
 SCRIPT;
--- a/modules/feature-search.php
+++ b/modules/feature-search.php
@ -1,7 +1,7 @@
 <?php
 register_module([
 	"name" => "Search",
-	"version" => "0.9",
+	"version" => "0.10",
 	"author" => "Starbeamrainbowlabs",
 	"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
 	"id" => "feature-search",
@ -146,7 +146,7 @@ register_module([
 			$start = microtime(true);
 			foreach($results as &$result) {
 				$result["context"] = search::extract_context(
-					$invindex, $result["pagename"],
+					$result["pagename"],
 					$_GET["query"],
 					file_get_contents($env->storage_prefix . $result["pagename"] . ".md")
 				);
@ -386,9 +386,8 @@ register_module([
 				exit("Error: The type '$type' is not one of the supported output types. Available values: json, opensearch. Default: json");
 			}
 			
-			$literator = Transliterator::createFromRules(':: Any-Latin; :: Latin-ASCII; :: NFD; :: [:Nonspacing Mark:] Remove; :: Lower(); :: NFC;', Transliterator::FORWARD);
 			
-			$query = $literator->transliterate($_GET["query"]);
+			$query = search::transliterate($_GET["query"]);
 			
 			
 			// Rank each page name
@ -397,7 +396,7 @@ register_module([
 				$results[] = [
 					"pagename" => $pageName,
 					// Costs: Insert: 1, Replace: 8, Delete: 6
-					"distance" => levenshtein($query, $literator->transliterate($pageName), 1, 8, 6)
+					"distance" => levenshtein($query, search::transliterate($pageName), 1, 8, 6)
 				];
 			}
 			
@ -490,13 +489,26 @@ class StorageBox {
 	 */
 	private $db;
 	
+	/**
+	 * A cache of values.
+	 * @var object[]
+	 */
+	private $cache = [];
+	
+	/**
+	 * A cache of prepared SQL statements.
+	 * @var \PDOStatement[]
+	 */
+	private $query_cache = [];
+	
 	/**
 	 * Initialises a new store connection.
 	 * @param	string	$filename	The filename that the store is located in.
 	 */
 	function __construct(string $filename) {
 		$firstrun = !file_exists($filename);
-		$this->db = new \PDO("sqlite:$filename");
+		$this->db = new \PDO("sqlite:" . path_resolve($filename, __DIR__)); // HACK: This might not work on some systems, because it depends on the current working directory
+		$this->db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
 		if($firstrun) {
 			$this->query("CREATE TABLE store (key TEXT UNIQUE NOT NULL, value TEXT)");
 		}
@ -508,11 +520,11 @@ class StorageBox {
 	 * @return	\PDOStatement		The result of the query, as a PDOStatement.
 	 */
 	private function query(string $sql, array $variables = []) {
-		// FUTURE: Optionally cache prepared statements?
-		$statement = $this->db->prepare($sql);
-		$statement->execute($variables);
-		
-		return $statement; // fetchColumn(), fetchAll(), etc. are defined on the statement, not the return value of execute()
+		// Add to the query cache if it doesn't exist
+		if(!isset($this->query_cache[$sql]))
+			$this->query_cache[$sql] = $this->db->prepare($sql);
+		$this->query_cache[$sql]->execute($variables);
+		return $this->query_cache[$sql]; // fetchColumn(), fetchAll(), etc. are defined on the statement, not the return value of execute()
 	}
 	
 	/**
@ -521,6 +533,8 @@ class StorageBox {
 	 * @return	bool	Whether the key exists in the store or not.
 	 */
 	public function has(string $key) : bool {
+		if(isset($this->cache[$key]))
+			return true;
 		return $this->query(
 			"SELECT COUNT(key) FROM store WHERE key = :key;",
 			[ "key" => $key ]
@ -529,29 +543,30 @@ class StorageBox {
 	
 	/**
 	 * Gets a value from the store.
-	 * @param	string	$key	The key to store the value under.
-	 * @return	string	The value to store.
+	 * @param	string	$key	The key value is stored under.
+	 * @return	mixed	The stored value.
 	 */
-	public function get(string $key) : string {
-		return $this->query(
+	public function get(string $key) {
+		// If it's not in the cache, insert it
+		if(!isset($this->cache[$key])) {
+			$this->cache[$key] = [ "modified" => false, "value" => json_decode($this->query(
 				"SELECT value FROM store WHERE key = :key;",
 				[ "key" => $key ]
-		)->fetchColumn();
+			)->fetchColumn()) ];
+		}
+		return $this->cache[$key]["value"];
 	}
 	
 	/**
 	 * Sets a value in the data store.
+	 * Note that this does NOT save changes to disk until you close the connection!
 	 * @param	string	$key	The key to set the value of.
-	 * @param	string	$value	The value to store.
+	 * @param	mixed	$value	The value to store.
 	 */
-	public function set(string $key, string $value) : void {
-		$this->query(
-			"INSERT OR REPLACE INTO store(key, value) VALUES(:key, :value)",
-			[
-				"key" => $key,
-				"value" => $value
-			]
-		);
+	public function set(string $key, $value) : void {
+		if(!isset($this->cache[$key])) $this->cache[$key] = [];
+		$this->cache[$key]["value"] = $value;
+		$this->cache[$key]["modified"] = true;
 	}
 	
 	/**
@ -560,6 +575,10 @@ class StorageBox {
 	 * @return	bool	Whether it was really deleted or not. Note that if it doesn't exist, then it can't be deleted.
 	 */
 	public function delete(string $key) : bool {
+		// Remove it from the cache
+		if(isset($this->cache[$key]))
+			unset($this->cache[$key]);
+		// Remove it from disk
 		$this->query(
 			"DELETE FROM store WHERE key = :key;",
 			[ "key" => $key ]
@ -570,8 +589,33 @@ class StorageBox {
 	 * Empties the store.
 	 */
 	public function clear() : void {
+		// Empty the cache;
+		$this->cache = [];
+		// Empty the disk
 		$this->query("DELETE FROM store;");
 	}
+	
+	/**
+	 * Syncs changes to disk and closes the PDO connection.
+	 */
+	public function close() : void {
+		$this->db->beginTransaction();
+		foreach($this->cache as $key => $value_data) {
+			// If it wasn't modified, there's no point in saving it, is there?
+			if(!$value_data["modified"])
+				continue;
+			
+			$this->query(
+				"INSERT OR REPLACE INTO store(key, value) VALUES(:key, :value)",
+				[
+					"key" => $key,
+					"value" => json_encode($value_data["value"])
+				]
+			);
+		}
+		$this->db->commit();
+		$this->db = null;
+	}
 }


@ -644,6 +688,20 @@ class search
 	 */
 	private static $invindex = null;
 	
+	private static $literator = null;
+	
+	/**
+	 * Transliterates a string to make it more suitable for entry into the search index.
+	 * @param  string $str The string to transliterate.
+	 * @return string      The transliterated string.
+	 */
+	public static function transliterate(string $str) : string {
+		if(self::$literator == null)
+			self::$literator = Transliterator::createFromRules(':: Any-Latin; :: Latin-ASCII; :: NFD; :: [:Nonspacing Mark:] Remove; :: Lower(); :: NFC;', Transliterator::FORWARD);
+		
+		return self::$literator->transliterate($_GET["query"]);
+	}
+	
 	/**
 	 * Converts a source string into an index of search terms that can be
 	 * merged into an inverted index.
@ -680,8 +738,6 @@ class search
 	 * @return	array	An array of raw tokens extracted from the specified source string.
 	 */
 	public static function tokenize(string $source, bool $capture_offsets = false) : array {
-		/** Normalises input characters for searching & indexing */
-		static $literator; if($literator == null) $literator = Transliterator::createFromRules(':: Any-Latin; :: Latin-ASCII; :: NFD; :: [:Nonspacing Mark:] Remove; :: Lower(); :: NFC;', Transliterator::FORWARD);
 		
 		$flags = PREG_SPLIT_NO_EMPTY; // Don't return empty items
 		if($capture_offsets)
@ -690,7 +746,7 @@ class search
 		// We don't need to normalise here because the transliterator handles 
 		// this for us. Also, we can't move the literator to a static member 
 		// variable because PHP doesn't like it very much
-		$source = $literator->transliterate($source);
+		$source = self::transliterate($source);
 		$source = preg_replace('/[\[\]\|\{\}\/]/u', " ", $source);
 		return preg_split("/((^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))|\|/u", $source, -1, $flags);
 	}
@ -721,8 +777,10 @@ class search
 		ids::clear();
 		
 		// Clear the existing inverted index out
-		$this->invindex->clear();
-		$this->invindex->set("|termlist|", "[]");
+		if(self::$invindex == null)
+			self::invindex_load($paths->searchindex);
+		self::$invindex->clear();
+		self::$invindex->set("|termlist|", []);
 		
 		// Reindex each page in turn
 		$i = 0; $max = count(get_object_vars($pageindex));
@ -750,6 +808,9 @@ class search
 			$i++;
 		}
 		
+		echo("data: Syncing to disk....\n\n");
+		self::invindex_close();
+		
 		if($output) {
 			echo("data: Search index rebuilding complete.\n\n");
 			echo("data: Couldn't find $missing_files pages on disk. If $settings->sitename couldn't find some pages on disk, then you might need to manually correct $settings->sitename's page index (stored in pageindex.json).\n\n");
@ -787,17 +848,28 @@ class search
 	}
 	
 	/**
-	 * Reads in and parses an inverted index.
+	 * Loads a connection to an inverted index.
 	 * @param	string	$invindex_filename	The path to the inverted index to load.
 	 * @todo	Remove this function and make everything streamable
 	 */
 	public static function invindex_load(string $invindex_filename) {
-		global $env;
+		global $env, $paths;
 		$start_time = microtime(true);
-		$this->invindex = new StorageBox($invindex_filename);
+		self::$invindex = new StorageBox($invindex_filename);
 		$env->perfdata->searchindex_load_time = round((microtime(true) - $start_time)*1000, 3);
 	}
 	
+	/**
+	 * Closes the currently open inverted index.
+	 */
+	public static function invindex_close() {
+		global $env;
+		
+		$start_time = microtime(true);
+		self::$invindex->close();
+		$env->perfdata->searchindex_close_time = round((microtime(true) - $start_time)*1000, 3);
+	}
+	
 	/**
 	 * Merge an index into an inverted index.
 	 * @param	int		$pageid		The id of the page to assign to the index that's being merged.
@ -805,47 +877,49 @@ class search
 	 * @param	array	$removals	An array of index entries to remove from the inverted index. Useful for applying changes to an inverted index instead of deleting and remerging an entire page's index.
 	 */
 	public static function invindex_merge($pageid, &$index, &$removals = []) : void {
-		if($this->invindex == null)
+		if(self::$invindex == null)
 			throw new Exception("Error: Can't merge into an inverted index that isn't loaded.");
 		
-		$termlist = json_decode($this->invindex->get("|termlist|"));
+		if(!self::$invindex->has("|termlist|"))
+			self::$invindex->set("|termlist|", []);
+		$termlist = self::$invindex->get("|termlist|");
 		
 		// Remove all the subentries that were removed since last time
 		foreach($removals as $nterm) {
 			// Delete the offsets
-			$this->invindex->delete("$nterm|$pageid");
+			self::$invindex->delete("$nterm|$pageid");
 			// Delete the item from the list of pageids containing this term
-			$nterm_pageids = json_decode($this->invindex->get($nterm));
+			$nterm_pageids = self::$invindex->get($nterm);
 			array_splice($nterm_pageids, array_search($pageid, $nterm_pageids), 1);
 			if(empty($nterm_pageids)) { // No need to keep the pageid list if there's nothing in it
-				$this->invindex->delete($nterm);
+				self::$invindex->delete($nterm);
 				// Update the termlist if we're deleting the term completely
 				$termlist_loc = array_search($nterm, $termlist);
 				if($termlist_loc !== false) array_splice($termlist, $termlist_loc, 1);
 			}
 			else
-				$this->invindex->set($nterm, json_encode($nterm_pageids));
+				self::$invindex->set($nterm, $nterm_pageids);
 		}
 		
 		// Merge all the new / changed index entries into the inverted index
 		foreach($index as $nterm => $newentry) {
-			if(!$this->invindex->has($nterm)) {
-				$this->invindex->set($nterm, "[]");
+			if(!self::$invindex->has($nterm)) {
+				self::$invindex->set($nterm, []);
 				$termlist[] = $nterm;
 			}
 			
 			// Update the nterm pageid list
-			$nterm_pageids = json_decode($this->invindex->get($nterm));
+			$nterm_pageids = self::$invindex->get($nterm);
 			if(array_search($pageid, $nterm_pageids) === false) {
 				$nterm_pageids[] = $pageid;
-				$this->invindex->set($nterm, json_encode($nterm_pageids));
+				self::$invindex->set($nterm, $nterm_pageids);
 			}
 			
 			// Store the offset list
-			$this->invindex->set("$nterm|$pageid", json_encode($newentry));
+			self::$invindex->set("$nterm|$pageid", $newentry);
 		}
 		
-		$this->invindex->set("|termlist|", json_encode($termlist));
+		self::$invindex->set("|termlist|", $termlist);
 	}
 	
 	/**
@ -853,9 +927,9 @@ class search
 	 * @param  int		$pageid		The pageid to remove.
 	 */
 	public static function invindex_delete(int $pageid) {
-		$termlist = json_decode($this->invindex->get("|termlist|"));
+		$termlist = self::$invindex->get("|termlist|");
 		foreach($termlist as $nterm) {
-			$nterm_pageids = json_decode($this->invindex->get("$nterm"));
+			$nterm_pageids = self::$invindex->get("$nterm");
 			$nterm_loc = array_search($pageid, $nterm_pageids);
 			// If this nterm doesn't appear in the list, we're not interested
 			if($nterm_loc === false)
@ -865,18 +939,18 @@ class search
 			array_splice($nterm_pageids, $nterm_loc, 1);
 			
 			// Delete the offset list
-			$this->invindex->delete("$nterm|$pageid");
+			self::$invindex->delete("$nterm|$pageid");
 			
 			// If this term doesn't appear in any other documents, delete it
 			if(count($nterm_pageids) === 0) {
-				$this->invindex->delete($nterm);
+				self::$invindex->delete($nterm);
 				array_splice($termlist, array_search($nterm, $termlist), 1);
 			}
 			else // Save the document id list back, since it still contains other pageids
-				$this->invindex->set($nterm, json_encode($nterm_pageids));
+				self::$invindex->set($nterm, $nterm_pageids);
 		}
 		// Save the termlist back to the store
-		$this->invindex->set("|termlist|", json_encode($termlist));
+		self::$invindex->set("|termlist|", $termlist);
 	}
 	
 	
@ -893,8 +967,8 @@ class search
 	 * Actually based on my earlier explode_adv https://starbeamrainbowlabs.com/blog/article.php?article=posts/081-PHP-String-Splitting.html
 	 * @param	string	$query	The queyr string to split.
 	 */
-	private function stas_split($query) {
-		$chars = str_split($query);
+	public function stas_split($query) {
+		$chars = str_split(self::transliterate($query));
 		$terms = [];
 		$next_term = "";
 		$toggle_state = false; // true = now inside, false = now outside
@ -931,7 +1005,7 @@ class search

 	 * @param	string[]	$tokens	The array of query tokens to parse.
 	 */
-	private function stas_parse($tokens) {
+	public function stas_parse($tokens) {
 		/* Supported Syntax *
 		 * 
 		 * -term				exclude a term
@ -939,7 +1013,7 @@ class search
 		 * terms !dest terms	redirect entire query (minus the !bang) to interwiki with registered shortcut dest
 		 * prefix:term			apply prefix operator to term
 		 */
-		var_dump($tokens);
+		// var_dump($tokens);
 		$result = [
 			"terms" => [],
 			"exclude" => [],
@ -1038,11 +1112,8 @@ class search
 	{
 		global $settings, $pageindex;
 		
-		/** Normalises input characters for searching & indexing */
-		static $literator; if($literator == null) $literator = Transliterator::createFromRules(':: Any-Latin; :: Latin-ASCII; :: NFD; :: [:Nonspacing Mark:] Remove; :: Lower(); :: NFC;', Transliterator::FORWARD);
-		
-		$query_stas = $this->stas_parse(
-			$this->stas_split($literator->transliterate($query))
+		$query_stas = self::stas_parse(
+			self::stas_split(self::transliterate($query))
 		);
 		
 		/* Sub-array format:
@ -1062,23 +1133,23 @@ class search
 		];
 		
 		// Query the inverted index
-		foreach($query_stas as $term_def) {
+		foreach($query_stas["terms"] as $term_def) {
 			if($term_def["weight"] == -1)
 				continue; // Skip stop words
 			
 			if(!in_array($term_def["location"], ["all", "inbody"]))
 				continue; // Skip terms we shouldn't search the page body for
 			
-			if(!$this->$invindex->has($term_def["term"]))
+			if(!self::$invindex->has($term_def["term"]))
 				continue; // Skip if it's not in the index
 			
 			// For each page that contains this term.....
-			$term_pageids = json_decode($this->invindex->get($term_def["term"]));
+			$term_pageids = self::$invindex->get($term_def["term"]);
 			foreach($term_pageids as $pageid) {
 				// Check to see if it contains any words we should exclude
 				$skip = false;
 				foreach($query_stas["exclude"] as $exlc_term) {
-					if($this->invindex->has("$excl_term|$pageid")) {
+					if(self::$invindex->has("$excl_term|$pageid")) {
 						$skip = true;
 						break;
 					}
@ -1086,7 +1157,7 @@ class search
 				if($skip) continue;
 				
 				// Get the list of offsets
-				$page_offsets = json_decode($this->invindex->get("{$term_def["term"]}|$pageid"));
+				$page_offsets = self::$invindex->get("{$term_def["term"]}|$pageid");
 				
 				if(!isset($matching_pages[$pageid]))
 					$matching_pages[$pageid] = $match_template; // Arrays are assigned by copy in php
@ -1102,7 +1173,7 @@ class search
 		}
 		
 		// Query page titles & tags
-		foreach($terms as $term_def) {
+		foreach($query_stas["terms"] as $term_def) {
 			// No need to skip stop words here, since we're doing a normal 
 			// sequential search anyway
 			if(!in_array($term_def["location"], ["all", "intitle", "intags"]))
@ -1114,8 +1185,8 @@ class search
 				// Setup a variable to hold the current page's id
 				$pageid = null; // Cache the page id
 				
-				$lit_title = $literator->transliterate($pagename);
-				$lit_tags = $literator->transliterate(implode(" ", $pagedata->tags));
+				$lit_title = self::transliterate($pagename);
+				$lit_tags = isset($pagedata->tags) ? self::transliterate(implode(" ", $pagedata->tags)) : null;
 				
 				// Make sure that the title & tags don't contain a term we should exclude
 				$skip = false;
@ -1145,6 +1216,10 @@ class search
 					}
 				}
 				
+				// If this page doesn't have any tags, skip it
+				if($lit_tags == null)
+					continue;
+				
 				if(!in_array($term_def["location"], ["all", "intags"]))
 					continue; // If we shouldn't search the tags, no point in continuing
 				
@ -1204,30 +1279,29 @@ class search
 	/**
 	 * Extracts a context string (in HTML) given a search query that could be displayed
 	 * in a list of search results.
-	 * @param	string	$invindex	The inverted index to consult.
 	 * @param	string	$pagename	The name of the paget that this source belongs to. Used when consulting the inverted index.
 	 * @param	string	$query		The search queary to generate the context for.
 	 * @param	string	$source		The page source to extract the context from.
 	 * @return	string				The generated context string.
 	 */
-	public static function extract_context($invindex, $pagename, $query, $source)
+	public static function extract_context($pagename, $query, $source)
 	{
 		global $settings;
 		
 		$pageid = ids::getid($pagename);
-		$nterms = self::tokenize($query);
+		$nterms = self::stas_parse(self::stas_split($query))["terms"];
+		
+		// Query the inverted index for offsets
 		$matches = [];
-		
 		foreach($nterms as $nterm) {
-			// Skip over words that don't appear in the inverted index (e.g. stop words)
-			if(!isset($invindex[$nterm]))
-				continue;
 			// Skip if the page isn't found in the inverted index for this word
-			if(!isset($invindex[$nterm][$pageid]))
+			if(!self::$invindex->has("{$nterm["term"]}|$pageid"))
 				continue;
 			
-			foreach($invindex[$nterm][$pageid]["offsets"] as $next_offset)
-				$matches[] = [ $nterm, $next_offset ];
+			$nterm_offsets = self::$invindex->get("{$nterm["term"]}|$pageid")->offsets;
+			
+			foreach($nterm_offsets as $next_offset)
+				$matches[] = [ $nterm["term"], $next_offset ];
 		}
 		
 		// Sort the matches by offset
@ -1279,6 +1353,8 @@ class search
 			$contexts_text[] = substr($source, $context["from"], $context["to"] - $context["from"]);
 		}
 		
+		// BUG: Make sure that a snippet is centred on the word in question if we have to cut it short
+		
 		$result = implode(" … ", $contexts_text);
 		end($contexts); // If there's at least one item in the list and were not at the very end of the page, add an extra ellipsis
 		if(isset($contexts[0]) && $contexts[key($contexts)]["to"] < $sourceLength) $result .= "… ";
@ -1296,15 +1372,15 @@ class search
 	 */
 	public static function highlight_context($query, $context)
 	{
-		$qterms = self::tokenize($query);
+		$qterms = self::stas_parse(self::stas_split($query))["terms"];
 		
-		foreach($qterms as $qterm)
-		{
-			if(in_array($qterm, static::$stop_words))
+		foreach($qterms as $qterm) {
+			// Stop words are marked by STAS
+			if($qterm["weight"] <= 0)
 				continue;
 			
 			// From http://stackoverflow.com/a/2483859/1460422
-			$context = preg_replace("/" . preg_replace('/\\//u', "\/", preg_quote($qterm)) . "/iu", "<strong class='search-term-highlight'>$0</strong>", $context);
+			$context = preg_replace("/" . preg_replace('/\\//u', "\/", preg_quote($qterm["term"])) . "/iu", "<strong class='search-term-highlight'>$0</strong>", $context);
 		}
 		
 		return $context;
--- a/modules/page-delete.php
+++ b/modules/page-delete.php
@ -1,7 +1,7 @@
 <?php
 register_module([
 	"name" => "Page deleter",
-	"version" => "0.10.1",
+	"version" => "0.10.2",
 	"author" => "Starbeamrainbowlabs",
 	"description" => "Adds an action to allow administrators to delete pages.",
 	"id" => "page-delete",
@ -86,6 +86,7 @@ register_module([
 				$pageid = ids::getid($env->page);
 				search::invindex_load($paths->searchindex);
 				search::invindex_delete($pageid);
+				search::invindex_close();
 			}
 			
 			// Remove the page's name from the id index
--- a/modules/page-edit.php
+++ b/modules/page-edit.php
@ -1,7 +1,7 @@
 <?php
 register_module([
 	"name" => "Page editor",
-	"version" => "0.17.4",
+	"version" => "0.17.5",
 	"author" => "Starbeamrainbowlabs",
 	"description" => "Allows you to edit pages by adding the edit and save actions. You should probably include this one.",
 	"id" => "page-edit",
@ -448,11 +448,11 @@ DIFFSCRIPT;
 			
 			// Update the inverted search index
 			
+			if(module_exists("feature-search")) {
 				// Construct an index for the old and new page content
 				$oldindex = [];
 				$oldpagedata = ""; // We need the old page data in order to pass it to the preprocessor
-			if(file_exists("$env->storage_prefix$env->page.md"))
-			{
+				if(file_exists("$env->storage_prefix$env->page.md")) {
 					$oldpagedata = file_get_contents("$env->storage_prefix$env->page.md");
 					$oldindex = search::index_generate($oldpagedata);
 				}
@ -467,7 +467,8 @@ DIFFSCRIPT;
 				// Merge the changes into the inverted index
 				search::invindex_merge(ids::getid($env->page), $additions, $removals);
 				// Save the inverted index back to disk
-			
+				search::invindex_close();
+			}
 			// -----~~~==~~~-----
 			
 			if(file_put_contents("$env->storage_prefix$env->page.md", $pagedata) !== false)
--- a/modules/page-help.php
+++ b/modules/page-help.php
@ -1,7 +1,7 @@
 <?php
 register_module([
 	"name" => "Help page",
-	"version" => "0.9.3",
+	"version" => "0.9.4",
 	"author" => "Starbeamrainbowlabs",
 	"description" => "Adds a rather useful help page. Access through the 'help' action. This module also exposes help content added to Pepperminty Wiki's inbuilt invisible help section system.",
 	"id" => "page-help",
@ -58,10 +58,8 @@ register_module([
 				$content .= "<ul>\n";
 				$content .= "<li>$settings->sitename's root directory is " . (!is_writeable(__DIR__) ? "not " : "") . "writeable.</li>\n";
 				$content .= "<li>The page index is currently " . human_filesize(filesize($paths->pageindex)) . " in size, and took " . $env->perfdata->pageindex_decode_time . "ms to decode.</li>";
-				if(module_exists("feature-search"))
-				{
-					search::measure_invindex_load_time($paths->searchindex);
-					$content .= "<li>The search index is currently " . human_filesize(filesize($paths->searchindex)) . " in size, and took " . $env->perfdata->searchindex_decode_time . "ms to decode.</li>";
+				if(module_exists("feature-search")) {
+					$content .= "<li>The search index is currently " . human_filesize(filesize($paths->searchindex)) . " in size.</li>";
 				}
 				
 				$content .= "<li>The id index is currently " . human_filesize(filesize($paths->idindex)) . " in size, and took " . $env->perfdata->idindex_decode_time . "ms to decode.</li>";