1
0
Fork 0
mirror of https://github.com/sbrl/Pepperminty-Wiki.git synced 2024-11-26 05:32:59 +00:00

Search page title and tags

This commit is contained in:
Starbeamrainbowlabs 2015-11-01 15:05:54 +00:00
parent 9046d72e7c
commit 173a72fa46
4 changed files with 133 additions and 21 deletions

View file

@ -215,6 +215,11 @@ $settings->search_max_distance_context_display = 100;
// term in the context below each search result. // term in the context below each search result.
$settings->search_characters_context = 200; $settings->search_characters_context = 200;
// The weighting to give to search term matches found in a page's title.
$settings->search_title_matches_weighting = 10;
// The weighting to give to search term matches found in a page's tags.
$settings->search_tags_matches_weighting = 3;
// A string of css to include. Will be included in the <head> of every page // A string of css to include. Will be included in the <head> of every page
// inside a <style> tag. This may also be a url - urls will be referenced via a // inside a <style> tag. This may also be a url - urls will be referenced via a
@ -1412,6 +1417,13 @@ register_module([
$pagesource = file_get_contents($result["pagename"] . ".md"); $pagesource = file_get_contents($result["pagename"] . ".md");
$context = search::extract_context($_GET["query"], $pagesource); $context = search::extract_context($_GET["query"], $pagesource);
$context = search::highlight_context($_GET["query"], $context); $context = search::highlight_context($_GET["query"], $context);
if(strlen($context) == 0)
{
$context = search::strip_markup(file_get_contents("$env->page.md", null, null, null, $settings->search_characters_context * 2));
if($pageindex->{$env->page}->size > $settings->search_characters_context * 2)
$context .= "...";
}
// We add 1 to $i here to convert it from an index to a result // We add 1 to $i here to convert it from an index to a result
// number as people expect it to start from 1 // number as people expect it to start from 1
@ -1622,28 +1634,66 @@ class search
public static function query_invindex($query, &$invindex) public static function query_invindex($query, &$invindex)
{ {
global $settings, $pageindex;
$query_terms = self::tokenize($query); $query_terms = self::tokenize($query);
$matching_pages = []; $matching_pages = [];
// Loop over each term in the query and find the matching page entries // Loop over each term in the query and find the matching page entries
for($i = 0; $i < count($query_terms); $i++) for($i = 0; $i < count($query_terms); $i++)
{ {
$qterm = $query_terms[$i]; $qterm = $query_terms[$i];
// Skip over this term if it isn't in the inverted index // Only search the inverted index if it actually exists there
if(!isset($invindex[$qterm])) if(isset($invindex[$qterm]))
continue;
// Loop over each page
foreach($invindex[$qterm] as $pageid => $page_entry)
{ {
// Create an entry in the matching pages array if it doesn't exist // Loop over each page in the inverted index entry
if(!isset($matching_pages[$pageid])) foreach($invindex[$qterm] as $pageid => $page_entry)
$matching_pages[$pageid] = [ "nterms" => [] ]; {
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry; // Create an entry in the matching pages array if it doesn't exist
if(!isset($matching_pages[$pageid]))
$matching_pages[$pageid] = [ "nterms" => [] ];
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
}
}
// Loop over the pageindex and search the titles / tags
foreach ($pageindex as $pagename => $pagedata)
{
// Get the current page's id
$pageid = ids::getid($pagename);
// Consider matches in the page title
if(stripos($pagename, $qterm) !== false)
{
// We found the qterm in the title
if(!isset($matching_pages[$pageid]))
$matching_pages[$pageid] = [ "nterms" => [] ];
// Set up a counter for page title matches if it doesn't exist already
if(!isset($matching_pages[$pageid]["title-matches"]))
$matching_pages[$pageid]["title-matches"] = 0;
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm));
}
// Consider matches in the page's tags
if(isset($pagedata->tags) and // If this page has tags
stripos(implode(" ", $pagedata->tags), $qterm) !== false) // And we found the qterm in the tags
{
if(!isset($matching_pages[$pageid]))
$matching_pages[$pageid] = [ "nterms" => [] ];
// Set up a counter for tag match if there isn't one already
if(!isset($matching_pages[$pageid]["tag-matches"]))
$matching_pages[$pageid]["tag-matches"] = 0;
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm));
}
} }
} }
foreach($matching_pages as $pageid => &$pagedata) foreach($matching_pages as $pageid => &$pagedata)
{ {
$pagedata["pagename"] = ids::getpagename($pageid); $pagedata["pagename"] = ids::getpagename($pageid);
@ -1656,6 +1706,12 @@ class search
// todo rank by context here // todo rank by context here
} }
// Consider matches in the title / tags
if(isset($pagedata["title-matches"]))
$pagedata["rank"] += $pagedata["title-matches"] * $settings->search_title_matches_weighting;
if(isset($pagedata["tag-matches"]))
$pagedata["rank"] += $pagedata["tag-matches"] * $settings->search_tags_matches_weighting;
// todo remove items if the rank is below a threshold // todo remove items if the rank is below a threshold
} }

View file

@ -50,7 +50,7 @@
"author": "Starbeamrainbowlabs", "author": "Starbeamrainbowlabs",
"description": "Adds proper search functionality to Pepperminty Wiki. Note that this module, at the moment, just contains test code while I figure out how best to write a search engine.", "description": "Adds proper search functionality to Pepperminty Wiki. Note that this module, at the moment, just contains test code while I figure out how best to write a search engine.",
"id": "feature-search", "id": "feature-search",
"lastupdate": 1446387843, "lastupdate": 1446390331,
"optional": false "optional": false
}, },
{ {

View file

@ -68,6 +68,13 @@ register_module([
$pagesource = file_get_contents($result["pagename"] . ".md"); $pagesource = file_get_contents($result["pagename"] . ".md");
$context = search::extract_context($_GET["query"], $pagesource); $context = search::extract_context($_GET["query"], $pagesource);
$context = search::highlight_context($_GET["query"], $context); $context = search::highlight_context($_GET["query"], $context);
if(strlen($context) == 0)
{
$context = search::strip_markup(file_get_contents("$env->page.md", null, null, null, $settings->search_characters_context * 2));
if($pageindex->{$env->page}->size > $settings->search_characters_context * 2)
$context .= "...";
}
// We add 1 to $i here to convert it from an index to a result // We add 1 to $i here to convert it from an index to a result
// number as people expect it to start from 1 // number as people expect it to start from 1
@ -278,28 +285,66 @@ class search
public static function query_invindex($query, &$invindex) public static function query_invindex($query, &$invindex)
{ {
global $settings, $pageindex;
$query_terms = self::tokenize($query); $query_terms = self::tokenize($query);
$matching_pages = []; $matching_pages = [];
// Loop over each term in the query and find the matching page entries // Loop over each term in the query and find the matching page entries
for($i = 0; $i < count($query_terms); $i++) for($i = 0; $i < count($query_terms); $i++)
{ {
$qterm = $query_terms[$i]; $qterm = $query_terms[$i];
// Skip over this term if it isn't in the inverted index // Only search the inverted index if it actually exists there
if(!isset($invindex[$qterm])) if(isset($invindex[$qterm]))
continue;
// Loop over each page
foreach($invindex[$qterm] as $pageid => $page_entry)
{ {
// Create an entry in the matching pages array if it doesn't exist // Loop over each page in the inverted index entry
if(!isset($matching_pages[$pageid])) foreach($invindex[$qterm] as $pageid => $page_entry)
$matching_pages[$pageid] = [ "nterms" => [] ]; {
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry; // Create an entry in the matching pages array if it doesn't exist
if(!isset($matching_pages[$pageid]))
$matching_pages[$pageid] = [ "nterms" => [] ];
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
}
}
// Loop over the pageindex and search the titles / tags
foreach ($pageindex as $pagename => $pagedata)
{
// Get the current page's id
$pageid = ids::getid($pagename);
// Consider matches in the page title
if(stripos($pagename, $qterm) !== false)
{
// We found the qterm in the title
if(!isset($matching_pages[$pageid]))
$matching_pages[$pageid] = [ "nterms" => [] ];
// Set up a counter for page title matches if it doesn't exist already
if(!isset($matching_pages[$pageid]["title-matches"]))
$matching_pages[$pageid]["title-matches"] = 0;
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm));
}
// Consider matches in the page's tags
if(isset($pagedata->tags) and // If this page has tags
stripos(implode(" ", $pagedata->tags), $qterm) !== false) // And we found the qterm in the tags
{
if(!isset($matching_pages[$pageid]))
$matching_pages[$pageid] = [ "nterms" => [] ];
// Set up a counter for tag match if there isn't one already
if(!isset($matching_pages[$pageid]["tag-matches"]))
$matching_pages[$pageid]["tag-matches"] = 0;
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm));
}
} }
} }
foreach($matching_pages as $pageid => &$pagedata) foreach($matching_pages as $pageid => &$pagedata)
{ {
$pagedata["pagename"] = ids::getpagename($pageid); $pagedata["pagename"] = ids::getpagename($pageid);
@ -312,6 +357,12 @@ class search
// todo rank by context here // todo rank by context here
} }
// Consider matches in the title / tags
if(isset($pagedata["title-matches"]))
$pagedata["rank"] += $pagedata["title-matches"] * $settings->search_title_matches_weighting;
if(isset($pagedata["tag-matches"]))
$pagedata["rank"] += $pagedata["tag-matches"] * $settings->search_tags_matches_weighting;
// todo remove items if the rank is below a threshold // todo remove items if the rank is below a threshold
} }

View file

@ -212,6 +212,11 @@ $settings->search_max_distance_context_display = 100;
// term in the context below each search result. // term in the context below each search result.
$settings->search_characters_context = 200; $settings->search_characters_context = 200;
// The weighting to give to search term matches found in a page's title.
$settings->search_title_matches_weighting = 10;
// The weighting to give to search term matches found in a page's tags.
$settings->search_tags_matches_weighting = 3;
// A string of css to include. Will be included in the <head> of every page // A string of css to include. Will be included in the <head> of every page
// inside a <style> tag. This may also be a url - urls will be referenced via a // inside a <style> tag. This may also be a url - urls will be referenced via a