"Search",
"version" => "0.13.2",
"author" => "Starbeamrainbowlabs",
"description" => "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"id" => "feature-search",
// After refactoring, we'll need to specify dependencies like this
"depends" => [ "lib-search-engine" ],
"code" => function() {
global $settings, $paths;
/**
* @api {get} ?action=index&page={pageName} Get an index of words for a given page
* @apiName SearchIndex
* @apiGroup Search
* @apiPermission Anonymous
* @apiDescription For debugging purposes. Be warned - the format could change at any time!
*
* @apiParam {string} page The page to generate a word index page.
*/
/*
* ██ ███ ██ ██████ ███████ ██ ██
* ██ ████ ██ ██ ██ ██ ██ ██
* ██ ██ ██ ██ ██ ██ █████ ███
* ██ ██ ██ ██ ██ ██ ██ ██ ██
* ██ ██ ████ ██████ ███████ ██ ██
*/
add_action("index", function() {
global $settings, $env;
$breakable_chars = "\r\n\t .,\\/!\"£$%^&*[]()+`_~#";
header("content-type: text/plain");
$source = file_get_contents("$env->storage_prefix$env->page.md");
$index = search::index_generate($source);
echo("Page name: $env->page\n");
echo("--------------- Source ---------------\n");
echo($source); echo("\n");
echo("--------------------------------------\n\n");
echo("---------------- Index ---------------\n");
foreach($index as $term => $entry) {
echo("$term: {$entry["freq"]} matches | " . implode(", ", $entry["offsets"]) . "\n");
}
echo("--------------------------------------\n");
});
/**
* @api {get} ?action=invindex-rebuild Rebuild the inverted search index from scratch
* @apiDescription Causes the inverted search index to be completely rebuilt from scratch. Can take a while for large wikis!
* @apiName SearchInvindexRebuild
* @apiGroup Search
* @apiPermission Admin
*
* @apiParam {string} secret Optional. Specify the secret from peppermint.json here in order to rebuild the search index without logging in.
*/
/*
* ██ ███ ██ ██ ██ ██ ███ ██ ██████ ███████ ██ ██
* ██ ████ ██ ██ ██ ██ ████ ██ ██ ██ ██ ██ ██
* ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ █████ ███ █████
* ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
* ██ ██ ████ ████ ██ ██ ████ ██████ ███████ ██ ██
*
* ██████ ███████ ██████ ██ ██ ██ ██ ██████
* ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
* ██████ █████ ██████ ██ ██ ██ ██ ██ ██
* ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
* ██ ██ ███████ ██████ ██████ ██ ███████ ██████
*/
add_action("invindex-rebuild", function() {
global $env, $settings;
if($env->is_admin ||
(
!empty($_POST["secret"]) &&
$_POST["secret"] === $settings->secret
)
)
search::invindex_rebuild();
else
{
http_response_code(401);
exit(page_renderer::render_main("Error - Search index regenerator - $settings->sitename", "
Error: You aren't allowed to regenerate the search index. Try logging in as an admin, or setting the secret POST parameter to $settings->sitename's secret - which can be found in $settings->sitename's peppermint.json file.
"));
}
});
/**
* @api {get} ?action=idindex-show Show the id index
* @apiDescription Outputs the id index. Useful if you need to verify that it's working as expected. Output is a json object.
* @apiName SearchShowIdIndex
* @apiGroup Search
* @apiPermission Anonymous
*/
add_action("idindex-show", function() {
global $idindex;
header("content-type: application/json; charset=UTF-8");
exit(json_encode($idindex, JSON_PRETTY_PRINT));
});
/**
* @api {get} ?action=search&query={text}[&format={format}] Search the wiki for a given query string
* @apiName Search
* @apiGroup Search
* @apiPermission Anonymous
*
* @apiParam {string} query The query string to search for.
* @apiParam {string} format Optional. Valid values: html, json. In json mode an object is returned with page names as keys, values as search result information - sorted in ranking order.
*/
/*
* ███████ ███████ █████ ██████ ██████ ██ ██
* ██ ██ ██ ██ ██ ██ ██ ██ ██
* ███████ █████ ███████ ██████ ██ ███████
* ██ ██ ██ ██ ██ ██ ██ ██ ██
* ███████ ███████ ██ ██ ██ ██ ██████ ██ ██
*/
add_action("search", function() {
global $settings, $env, $pageindex, $paths;
// Create the inverted index if it doesn't exist.
if(!file_exists($paths->searchindex))
search::invindex_rebuild(false);
// Create the didyoumean index if it doesn't exist.
if(module_exists("feature-search-didyoumean") && !file_exists($paths->didyoumeanindex))
search::didyoumean_rebuild(false);
if(!isset($_GET["query"]))
exit(page_renderer::render("No Search Terms - Error - $settings->sitename", "
You didn't specify any search terms. Try typing some into the box above.
"));
$search_start = microtime(true);
$time_start = microtime(true);
search::invindex_load($paths->searchindex);
$env->perfdata->invindex_decode_time = round((microtime(true) - $time_start)*1000, 3);
$time_start = microtime(true);
$query_parsed = null;
$results = search::invindex_query($_GET["query"], $query_parsed);
$resultCount = count($results);
$env->perfdata->invindex_query_time = round((microtime(true) - $time_start)*1000, 3);
header("x-invindex-load-time: {$env->perfdata->invindex_decode_time}ms");
header("x-invindex-query-time: {$env->perfdata->invindex_query_time}ms");
$start = microtime(true);
// FUTURE: When we implement $_GET["offset"] and $_GET["count"] or something we can optimise here
foreach($results as $key => &$result) {
$filepath = $env->storage_prefix . $result["pagename"] . ".md";
if(!file_exists($filepath)) {
error_log("[PeppermintyWiki/$settings->sitename/search] Search engine returned {$result["pagename"]} as a result (maps to $filepath), but it doesn't exist on disk (try rebuilding the search index).");
continue; // Something strange is happening
}
$result["context"] = search::extract_context(
$result["pagename"],
$query_parsed,
file_get_contents($filepath)
);
}
// This is absolutely *essential*, because otherwise we hit a very strange bug whereby PHP duplicates the value of the last iterated search result. Ref https://bugs.php.net/bug.php?id=70387 - apparently "documented behaviour"
unset($result);
$env->perfdata->context_generation_time = round((microtime(true) - $start)*1000, 3);
header("x-context-generation-time: {$env->perfdata->context_generation_time}ms");
$env->perfdata->search_time = round((microtime(true) - $search_start)*1000, 3);
header("x-search-time: {$env->perfdata->search_time}ms");
if(!empty($_GET["format"]) && $_GET["format"] == "json") {
header("content-type: application/json");
$json_results = new stdClass();
foreach($results as $key => $result)
$json_results->{$result["pagename"]} = $result;
exit(json_encode($json_results));
}
$title = $_GET["query"] . " - Search results - $settings->sitename";
$content = "\n";
$content .= "
Found $resultCount " . ($resultCount === 1 ? "result" : "results") . " in " . $env->perfdata->search_time . "ms. ";
$query = $_GET["query"];
if(isset($pageindex->$query)) {
$content .= "There's a page on $settings->sitename called $query.";
}
else
{
$content .= "There isn't a page called $query on $settings->sitename, but you ";
if((!$settings->anonedits && !$env->is_logged_in) || !$settings->editing) {
$content .= "do not have permission to create it.";
if(!$env->is_logged_in) {
$content .= " You could try logging in.";
}
}
else {
$content .= "can create it.";
}
}
$content .= " Pssst! Power users can make use of $settings->sitename's advanced query syntax. Learn about it here!
";
}
}
$i = 0; // todo use $_GET["offset"] and $_GET["result-count"] or something
foreach($results as $result)
{
$link = "?page=" . rawurlencode($result["pagename"]);
$pagesource = file_get_contents($env->storage_prefix . $result["pagename"] . ".md");
//echo("Extracting context for result " . $result["pagename"] . ".\n");
$context = $result["context"];
if(mb_strlen($context) === 0)
$context = mb_substr($pagesource, 0, $settings->search_characters_context * 2);
//echo("'Generated search context for " . $result["pagename"] . ": $context'\n");
$context = search::highlight_context(
$query_parsed,
preg_replace('/page.md", null, null, null, $settings->search_characters_context * 2));
if($pageindex->{$env->page}->size > $settings->search_characters_context * 2)
$context .= "...";
}*/
$tag_list = "";
foreach($pageindex->{$result["pagename"]}->tags ?? [] as $tag) $tag_list .= "$tag";
$tag_list .= "\n";
// Make redirect pages italics
if(!empty($pageindex->{$result["pagename"]}->redirect))
$result["pagename"] = "{$result["pagename"]}";
// We add 1 to $i here to convert it from an index to a result
// number as people expect it to start from 1
$content .= "
$settings->sitename understood your query to mean the following:
$result
"));
});
/*
* ██████ ██████ ███████ ███ ██ ███████ ███████ █████ ██████ ██████ ██ ██
* ██ ██ ██ ██ ██ ████ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
* ██ ██ ██████ █████ ██ ██ ██ ███████ █████ ███████ ██████ ██ ███████
* ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██
* ██████ ██ ███████ ██ ████ ███████ ███████ ██ ██ ██ ██ ██████ ██ ██
*/
/**
* @api {get} ?action=opensearch-description Get the opensearch description file
* @apiName OpenSearchDescription
* @apiGroup Search
* @apiPermission Anonymous
*/
add_action("opensearch-description", function () {
global $settings;
$siteRoot = full_url() . "/index.php";
if(!isset($_GET["debug"]))
header("content-type: application/opensearchdescription+xml");
else
header("content-type: text/plain");
exit('' . // hack The build system strips it otherwise O.o I should really fix that.
"\nSearch $settings->sitenameSearch $settings->sitename, which is powered by Pepperminty Wiki.$settings->sitename Wiki$settings->faviconSearch content available under the license linked to at the bottom of the search results page.Starbeamrainbowlabs (https://github.com/sbrl/Pepperminty-Wiki/graphs/contributors)UTF-8UTF-8");
});
/**
* @api {get} ?action=suggest-pages[&type={type}] Get page name suggestions for a query
* @apiName OpenSearchDescription
* @apiGroup Search
* @apiPermission Anonymous
*
* @apiParam {string} text The search query string to get search suggestions for.
* @apiParam {string} type The type of result to return. Default value: json. Available values: json, opensearch
*/
add_action("suggest-pages", function() {
global $settings, $pageindex;
if($settings->dynamic_page_suggestion_count === 0) {
header("content-type: application/json");
header("content-length: 3");
exit("[]\n");
}
if(empty($_GET["query"])) {
http_response_code(400);
header("content-type: text/plain");
exit("Error: You didn't specify the 'query' GET parameter.");
}
$type = $_GET["type"] ?? "json";
if(!in_array($type, ["json", "opensearch"])) {
http_response_code(406);
exit("Error: The type '$type' is not one of the supported output types. Available values: json, opensearch. Default: json");
}
$query = search::$literator->transliterate($_GET["query"]);
// Rank each page name
$results = [];
foreach($pageindex as $pageName => $entry) {
$results[] = [
"pagename" => $pageName,
// Costs: Insert: 1, Replace: 8, Delete: 6
"distance" => levenshtein($query, search::$literator->transliterate($pageName), 1, 8, 6)
];
}
// Sort the page names by distance from the original query
usort($results, function($a, $b) {
if($a["distance"] == $b["distance"])
return strcmp($a["pagename"], $b["pagename"]);
return $a["distance"] < $b["distance"] ? -1 : 1;
});
// Send the results to the user
$suggestions = array_slice($results, 0, $settings->dynamic_page_suggestion_count);
switch($type)
{
case "json":
header("content-type: application/json");
exit(json_encode($suggestions));
case "opensearch":
$opensearch_output = [
$_GET["query"],
array_map(function($suggestion) { return $suggestion["pagename"]; }, $suggestions)
];
header("content-type: application/x-suggestions+json");
exit(json_encode($opensearch_output));
}
});
if($settings->dynamic_page_suggestion_count > 0)
{
page_renderer::add_js_snippet('/// Dynamic page suggestion system
// Micro snippet 8 - Promisified GET (fetched 20th Nov 2016)
function get(u){return new Promise(function(r,t,a){a=new XMLHttpRequest();a.onload=function(b,c){b=a.status;c=a.response;if(b>199&&b<300){r(c)}else{t(c)}};a.open("GET",u,true);a.send(null)})}
window.addEventListener("load", function(event) {
var searchBox = document.querySelector("input[type=search]");
searchBox.dataset.lastValue = "";
searchBox.addEventListener("keyup", function(event) {
// Make sure that we don\'t keep sending requests to the server if nothing has changed
if(searchBox.dataset.lastValue == event.target.value)
return;
searchBox.dataset.lastValue = event.target.value;
// Fetch the suggestions from the server
get("?action=suggest-pages&query=" + encodeURIComponent(event.target.value)).then(function(response) {
var suggestions = JSON.parse(response),
dataList = document.getElementById("allpages");
// If the server sent no suggestions, then we shouldn\'t replace the contents of the datalist
if(suggestions.length == 0)
return;
console.info(`Fetched suggestions for ${event.target.value}:`, suggestions.map(s => s.pagename));
// Remove all the existing suggestions
while(dataList.firstChild) {
dataList.removeChild(dataList.firstChild);
}
// Add the new suggestions to the datalist
var optionsFrag = document.createDocumentFragment();
suggestions.forEach(function(suggestion) {
var suggestionElement = document.createElement("option");
suggestionElement.value = suggestion.pagename;
suggestionElement.dataset.distance = suggestion.distance;
optionsFrag.appendChild(suggestionElement);
});
dataList.appendChild(optionsFrag);
});
});
});
');
}
if(module_exists("feature-cli")) {
cli_register("search", "Query and manipulate the search index", function(array $args) : int {
if(count($args) < 1) {
echo("search: query and manipulate the search index
Usage:
search {subcommand}
Subcommands:
rebuild Rebuilds the search index
");
return 0;
}
switch($args[0]) {
case "rebuild":
search::invindex_rebuild();
break;
}
return 0;
});
}
add_help_section("27-search", "Searching", "
$settings->sitename has an integrated full-text search engine, allowing you to search all of the pages on $settings->sitename and their content. To use it, simply enter your query into the page name box and press enter. If a page isn't found with the exact name of your query terms, a search will be performed instead.
Additionally, advanced users can take advantage of some extra query syntax that $settings->sitename supports, which is inspired by popular search engines:
Example
Meaning
cat -dog
Search for pages containing \"cat\", but not \"dog\". This syntax does not make sense on it's own - other words must be present for it to take effect.