mirror of
https://github.com/sbrl/Pepperminty-Wiki.git
synced 2024-12-22 13:45:02 +00:00
Search page title and tags
This commit is contained in:
parent
9046d72e7c
commit
173a72fa46
4 changed files with 133 additions and 21 deletions
|
@ -215,6 +215,11 @@ $settings->search_max_distance_context_display = 100;
|
|||
// term in the context below each search result.
|
||||
$settings->search_characters_context = 200;
|
||||
|
||||
// The weighting to give to search term matches found in a page's title.
|
||||
$settings->search_title_matches_weighting = 10;
|
||||
|
||||
// The weighting to give to search term matches found in a page's tags.
|
||||
$settings->search_tags_matches_weighting = 3;
|
||||
|
||||
// A string of css to include. Will be included in the <head> of every page
|
||||
// inside a <style> tag. This may also be a url - urls will be referenced via a
|
||||
|
@ -1412,6 +1417,13 @@ register_module([
|
|||
$pagesource = file_get_contents($result["pagename"] . ".md");
|
||||
$context = search::extract_context($_GET["query"], $pagesource);
|
||||
$context = search::highlight_context($_GET["query"], $context);
|
||||
if(strlen($context) == 0)
|
||||
{
|
||||
$context = search::strip_markup(file_get_contents("$env->page.md", null, null, null, $settings->search_characters_context * 2));
|
||||
if($pageindex->{$env->page}->size > $settings->search_characters_context * 2)
|
||||
$context .= "...";
|
||||
}
|
||||
|
||||
|
||||
// We add 1 to $i here to convert it from an index to a result
|
||||
// number as people expect it to start from 1
|
||||
|
@ -1622,28 +1634,66 @@ class search
|
|||
|
||||
public static function query_invindex($query, &$invindex)
|
||||
{
|
||||
global $settings, $pageindex;
|
||||
|
||||
$query_terms = self::tokenize($query);
|
||||
$matching_pages = [];
|
||||
|
||||
|
||||
// Loop over each term in the query and find the matching page entries
|
||||
for($i = 0; $i < count($query_terms); $i++)
|
||||
{
|
||||
$qterm = $query_terms[$i];
|
||||
|
||||
// Skip over this term if it isn't in the inverted index
|
||||
if(!isset($invindex[$qterm]))
|
||||
continue;
|
||||
|
||||
// Loop over each page
|
||||
foreach($invindex[$qterm] as $pageid => $page_entry)
|
||||
// Only search the inverted index if it actually exists there
|
||||
if(isset($invindex[$qterm]))
|
||||
{
|
||||
// Create an entry in the matching pages array if it doesn't exist
|
||||
if(!isset($matching_pages[$pageid]))
|
||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
|
||||
// Loop over each page in the inverted index entry
|
||||
foreach($invindex[$qterm] as $pageid => $page_entry)
|
||||
{
|
||||
// Create an entry in the matching pages array if it doesn't exist
|
||||
if(!isset($matching_pages[$pageid]))
|
||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Loop over the pageindex and search the titles / tags
|
||||
foreach ($pageindex as $pagename => $pagedata)
|
||||
{
|
||||
// Get the current page's id
|
||||
$pageid = ids::getid($pagename);
|
||||
// Consider matches in the page title
|
||||
if(stripos($pagename, $qterm) !== false)
|
||||
{
|
||||
// We found the qterm in the title
|
||||
if(!isset($matching_pages[$pageid]))
|
||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||
|
||||
// Set up a counter for page title matches if it doesn't exist already
|
||||
if(!isset($matching_pages[$pageid]["title-matches"]))
|
||||
$matching_pages[$pageid]["title-matches"] = 0;
|
||||
|
||||
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm));
|
||||
}
|
||||
|
||||
// Consider matches in the page's tags
|
||||
if(isset($pagedata->tags) and // If this page has tags
|
||||
stripos(implode(" ", $pagedata->tags), $qterm) !== false) // And we found the qterm in the tags
|
||||
{
|
||||
if(!isset($matching_pages[$pageid]))
|
||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||
|
||||
// Set up a counter for tag match if there isn't one already
|
||||
if(!isset($matching_pages[$pageid]["tag-matches"]))
|
||||
$matching_pages[$pageid]["tag-matches"] = 0;
|
||||
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
foreach($matching_pages as $pageid => &$pagedata)
|
||||
{
|
||||
$pagedata["pagename"] = ids::getpagename($pageid);
|
||||
|
@ -1656,6 +1706,12 @@ class search
|
|||
// todo rank by context here
|
||||
}
|
||||
|
||||
// Consider matches in the title / tags
|
||||
if(isset($pagedata["title-matches"]))
|
||||
$pagedata["rank"] += $pagedata["title-matches"] * $settings->search_title_matches_weighting;
|
||||
if(isset($pagedata["tag-matches"]))
|
||||
$pagedata["rank"] += $pagedata["tag-matches"] * $settings->search_tags_matches_weighting;
|
||||
|
||||
// todo remove items if the rank is below a threshold
|
||||
}
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
"author": "Starbeamrainbowlabs",
|
||||
"description": "Adds proper search functionality to Pepperminty Wiki. Note that this module, at the moment, just contains test code while I figure out how best to write a search engine.",
|
||||
"id": "feature-search",
|
||||
"lastupdate": 1446387843,
|
||||
"lastupdate": 1446390331,
|
||||
"optional": false
|
||||
},
|
||||
{
|
||||
|
|
|
@ -68,6 +68,13 @@ register_module([
|
|||
$pagesource = file_get_contents($result["pagename"] . ".md");
|
||||
$context = search::extract_context($_GET["query"], $pagesource);
|
||||
$context = search::highlight_context($_GET["query"], $context);
|
||||
if(strlen($context) == 0)
|
||||
{
|
||||
$context = search::strip_markup(file_get_contents("$env->page.md", null, null, null, $settings->search_characters_context * 2));
|
||||
if($pageindex->{$env->page}->size > $settings->search_characters_context * 2)
|
||||
$context .= "...";
|
||||
}
|
||||
|
||||
|
||||
// We add 1 to $i here to convert it from an index to a result
|
||||
// number as people expect it to start from 1
|
||||
|
@ -278,28 +285,66 @@ class search
|
|||
|
||||
public static function query_invindex($query, &$invindex)
|
||||
{
|
||||
global $settings, $pageindex;
|
||||
|
||||
$query_terms = self::tokenize($query);
|
||||
$matching_pages = [];
|
||||
|
||||
|
||||
// Loop over each term in the query and find the matching page entries
|
||||
for($i = 0; $i < count($query_terms); $i++)
|
||||
{
|
||||
$qterm = $query_terms[$i];
|
||||
|
||||
// Skip over this term if it isn't in the inverted index
|
||||
if(!isset($invindex[$qterm]))
|
||||
continue;
|
||||
|
||||
// Loop over each page
|
||||
foreach($invindex[$qterm] as $pageid => $page_entry)
|
||||
// Only search the inverted index if it actually exists there
|
||||
if(isset($invindex[$qterm]))
|
||||
{
|
||||
// Create an entry in the matching pages array if it doesn't exist
|
||||
if(!isset($matching_pages[$pageid]))
|
||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
|
||||
// Loop over each page in the inverted index entry
|
||||
foreach($invindex[$qterm] as $pageid => $page_entry)
|
||||
{
|
||||
// Create an entry in the matching pages array if it doesn't exist
|
||||
if(!isset($matching_pages[$pageid]))
|
||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Loop over the pageindex and search the titles / tags
|
||||
foreach ($pageindex as $pagename => $pagedata)
|
||||
{
|
||||
// Get the current page's id
|
||||
$pageid = ids::getid($pagename);
|
||||
// Consider matches in the page title
|
||||
if(stripos($pagename, $qterm) !== false)
|
||||
{
|
||||
// We found the qterm in the title
|
||||
if(!isset($matching_pages[$pageid]))
|
||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||
|
||||
// Set up a counter for page title matches if it doesn't exist already
|
||||
if(!isset($matching_pages[$pageid]["title-matches"]))
|
||||
$matching_pages[$pageid]["title-matches"] = 0;
|
||||
|
||||
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm));
|
||||
}
|
||||
|
||||
// Consider matches in the page's tags
|
||||
if(isset($pagedata->tags) and // If this page has tags
|
||||
stripos(implode(" ", $pagedata->tags), $qterm) !== false) // And we found the qterm in the tags
|
||||
{
|
||||
if(!isset($matching_pages[$pageid]))
|
||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||
|
||||
// Set up a counter for tag match if there isn't one already
|
||||
if(!isset($matching_pages[$pageid]["tag-matches"]))
|
||||
$matching_pages[$pageid]["tag-matches"] = 0;
|
||||
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
foreach($matching_pages as $pageid => &$pagedata)
|
||||
{
|
||||
$pagedata["pagename"] = ids::getpagename($pageid);
|
||||
|
@ -312,6 +357,12 @@ class search
|
|||
// todo rank by context here
|
||||
}
|
||||
|
||||
// Consider matches in the title / tags
|
||||
if(isset($pagedata["title-matches"]))
|
||||
$pagedata["rank"] += $pagedata["title-matches"] * $settings->search_title_matches_weighting;
|
||||
if(isset($pagedata["tag-matches"]))
|
||||
$pagedata["rank"] += $pagedata["tag-matches"] * $settings->search_tags_matches_weighting;
|
||||
|
||||
// todo remove items if the rank is below a threshold
|
||||
}
|
||||
|
||||
|
|
|
@ -212,6 +212,11 @@ $settings->search_max_distance_context_display = 100;
|
|||
// term in the context below each search result.
|
||||
$settings->search_characters_context = 200;
|
||||
|
||||
// The weighting to give to search term matches found in a page's title.
|
||||
$settings->search_title_matches_weighting = 10;
|
||||
|
||||
// The weighting to give to search term matches found in a page's tags.
|
||||
$settings->search_tags_matches_weighting = 3;
|
||||
|
||||
// A string of css to include. Will be included in the <head> of every page
|
||||
// inside a <style> tag. This may also be a url - urls will be referenced via a
|
||||
|
|
Loading…
Reference in a new issue