mirror of
https://github.com/sbrl/Pepperminty-Wiki.git
synced 2024-11-26 05:32:59 +00:00
Search page title and tags
This commit is contained in:
parent
9046d72e7c
commit
173a72fa46
4 changed files with 133 additions and 21 deletions
|
@ -215,6 +215,11 @@ $settings->search_max_distance_context_display = 100;
|
||||||
// term in the context below each search result.
|
// term in the context below each search result.
|
||||||
$settings->search_characters_context = 200;
|
$settings->search_characters_context = 200;
|
||||||
|
|
||||||
|
// The weighting to give to search term matches found in a page's title.
|
||||||
|
$settings->search_title_matches_weighting = 10;
|
||||||
|
|
||||||
|
// The weighting to give to search term matches found in a page's tags.
|
||||||
|
$settings->search_tags_matches_weighting = 3;
|
||||||
|
|
||||||
// A string of css to include. Will be included in the <head> of every page
|
// A string of css to include. Will be included in the <head> of every page
|
||||||
// inside a <style> tag. This may also be a url - urls will be referenced via a
|
// inside a <style> tag. This may also be a url - urls will be referenced via a
|
||||||
|
@ -1412,6 +1417,13 @@ register_module([
|
||||||
$pagesource = file_get_contents($result["pagename"] . ".md");
|
$pagesource = file_get_contents($result["pagename"] . ".md");
|
||||||
$context = search::extract_context($_GET["query"], $pagesource);
|
$context = search::extract_context($_GET["query"], $pagesource);
|
||||||
$context = search::highlight_context($_GET["query"], $context);
|
$context = search::highlight_context($_GET["query"], $context);
|
||||||
|
if(strlen($context) == 0)
|
||||||
|
{
|
||||||
|
$context = search::strip_markup(file_get_contents("$env->page.md", null, null, null, $settings->search_characters_context * 2));
|
||||||
|
if($pageindex->{$env->page}->size > $settings->search_characters_context * 2)
|
||||||
|
$context .= "...";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// We add 1 to $i here to convert it from an index to a result
|
// We add 1 to $i here to convert it from an index to a result
|
||||||
// number as people expect it to start from 1
|
// number as people expect it to start from 1
|
||||||
|
@ -1622,28 +1634,66 @@ class search
|
||||||
|
|
||||||
public static function query_invindex($query, &$invindex)
|
public static function query_invindex($query, &$invindex)
|
||||||
{
|
{
|
||||||
|
global $settings, $pageindex;
|
||||||
|
|
||||||
$query_terms = self::tokenize($query);
|
$query_terms = self::tokenize($query);
|
||||||
$matching_pages = [];
|
$matching_pages = [];
|
||||||
|
|
||||||
|
|
||||||
// Loop over each term in the query and find the matching page entries
|
// Loop over each term in the query and find the matching page entries
|
||||||
for($i = 0; $i < count($query_terms); $i++)
|
for($i = 0; $i < count($query_terms); $i++)
|
||||||
{
|
{
|
||||||
$qterm = $query_terms[$i];
|
$qterm = $query_terms[$i];
|
||||||
|
|
||||||
// Skip over this term if it isn't in the inverted index
|
// Only search the inverted index if it actually exists there
|
||||||
if(!isset($invindex[$qterm]))
|
if(isset($invindex[$qterm]))
|
||||||
continue;
|
|
||||||
|
|
||||||
// Loop over each page
|
|
||||||
foreach($invindex[$qterm] as $pageid => $page_entry)
|
|
||||||
{
|
{
|
||||||
// Create an entry in the matching pages array if it doesn't exist
|
// Loop over each page in the inverted index entry
|
||||||
if(!isset($matching_pages[$pageid]))
|
foreach($invindex[$qterm] as $pageid => $page_entry)
|
||||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
{
|
||||||
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
|
// Create an entry in the matching pages array if it doesn't exist
|
||||||
|
if(!isset($matching_pages[$pageid]))
|
||||||
|
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||||
|
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Loop over the pageindex and search the titles / tags
|
||||||
|
foreach ($pageindex as $pagename => $pagedata)
|
||||||
|
{
|
||||||
|
// Get the current page's id
|
||||||
|
$pageid = ids::getid($pagename);
|
||||||
|
// Consider matches in the page title
|
||||||
|
if(stripos($pagename, $qterm) !== false)
|
||||||
|
{
|
||||||
|
// We found the qterm in the title
|
||||||
|
if(!isset($matching_pages[$pageid]))
|
||||||
|
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||||
|
|
||||||
|
// Set up a counter for page title matches if it doesn't exist already
|
||||||
|
if(!isset($matching_pages[$pageid]["title-matches"]))
|
||||||
|
$matching_pages[$pageid]["title-matches"] = 0;
|
||||||
|
|
||||||
|
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consider matches in the page's tags
|
||||||
|
if(isset($pagedata->tags) and // If this page has tags
|
||||||
|
stripos(implode(" ", $pagedata->tags), $qterm) !== false) // And we found the qterm in the tags
|
||||||
|
{
|
||||||
|
if(!isset($matching_pages[$pageid]))
|
||||||
|
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||||
|
|
||||||
|
// Set up a counter for tag match if there isn't one already
|
||||||
|
if(!isset($matching_pages[$pageid]["tag-matches"]))
|
||||||
|
$matching_pages[$pageid]["tag-matches"] = 0;
|
||||||
|
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
foreach($matching_pages as $pageid => &$pagedata)
|
foreach($matching_pages as $pageid => &$pagedata)
|
||||||
{
|
{
|
||||||
$pagedata["pagename"] = ids::getpagename($pageid);
|
$pagedata["pagename"] = ids::getpagename($pageid);
|
||||||
|
@ -1656,6 +1706,12 @@ class search
|
||||||
// todo rank by context here
|
// todo rank by context here
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Consider matches in the title / tags
|
||||||
|
if(isset($pagedata["title-matches"]))
|
||||||
|
$pagedata["rank"] += $pagedata["title-matches"] * $settings->search_title_matches_weighting;
|
||||||
|
if(isset($pagedata["tag-matches"]))
|
||||||
|
$pagedata["rank"] += $pagedata["tag-matches"] * $settings->search_tags_matches_weighting;
|
||||||
|
|
||||||
// todo remove items if the rank is below a threshold
|
// todo remove items if the rank is below a threshold
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@
|
||||||
"author": "Starbeamrainbowlabs",
|
"author": "Starbeamrainbowlabs",
|
||||||
"description": "Adds proper search functionality to Pepperminty Wiki. Note that this module, at the moment, just contains test code while I figure out how best to write a search engine.",
|
"description": "Adds proper search functionality to Pepperminty Wiki. Note that this module, at the moment, just contains test code while I figure out how best to write a search engine.",
|
||||||
"id": "feature-search",
|
"id": "feature-search",
|
||||||
"lastupdate": 1446387843,
|
"lastupdate": 1446390331,
|
||||||
"optional": false
|
"optional": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -68,6 +68,13 @@ register_module([
|
||||||
$pagesource = file_get_contents($result["pagename"] . ".md");
|
$pagesource = file_get_contents($result["pagename"] . ".md");
|
||||||
$context = search::extract_context($_GET["query"], $pagesource);
|
$context = search::extract_context($_GET["query"], $pagesource);
|
||||||
$context = search::highlight_context($_GET["query"], $context);
|
$context = search::highlight_context($_GET["query"], $context);
|
||||||
|
if(strlen($context) == 0)
|
||||||
|
{
|
||||||
|
$context = search::strip_markup(file_get_contents("$env->page.md", null, null, null, $settings->search_characters_context * 2));
|
||||||
|
if($pageindex->{$env->page}->size > $settings->search_characters_context * 2)
|
||||||
|
$context .= "...";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// We add 1 to $i here to convert it from an index to a result
|
// We add 1 to $i here to convert it from an index to a result
|
||||||
// number as people expect it to start from 1
|
// number as people expect it to start from 1
|
||||||
|
@ -278,28 +285,66 @@ class search
|
||||||
|
|
||||||
public static function query_invindex($query, &$invindex)
|
public static function query_invindex($query, &$invindex)
|
||||||
{
|
{
|
||||||
|
global $settings, $pageindex;
|
||||||
|
|
||||||
$query_terms = self::tokenize($query);
|
$query_terms = self::tokenize($query);
|
||||||
$matching_pages = [];
|
$matching_pages = [];
|
||||||
|
|
||||||
|
|
||||||
// Loop over each term in the query and find the matching page entries
|
// Loop over each term in the query and find the matching page entries
|
||||||
for($i = 0; $i < count($query_terms); $i++)
|
for($i = 0; $i < count($query_terms); $i++)
|
||||||
{
|
{
|
||||||
$qterm = $query_terms[$i];
|
$qterm = $query_terms[$i];
|
||||||
|
|
||||||
// Skip over this term if it isn't in the inverted index
|
// Only search the inverted index if it actually exists there
|
||||||
if(!isset($invindex[$qterm]))
|
if(isset($invindex[$qterm]))
|
||||||
continue;
|
|
||||||
|
|
||||||
// Loop over each page
|
|
||||||
foreach($invindex[$qterm] as $pageid => $page_entry)
|
|
||||||
{
|
{
|
||||||
// Create an entry in the matching pages array if it doesn't exist
|
// Loop over each page in the inverted index entry
|
||||||
if(!isset($matching_pages[$pageid]))
|
foreach($invindex[$qterm] as $pageid => $page_entry)
|
||||||
$matching_pages[$pageid] = [ "nterms" => [] ];
|
{
|
||||||
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
|
// Create an entry in the matching pages array if it doesn't exist
|
||||||
|
if(!isset($matching_pages[$pageid]))
|
||||||
|
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||||
|
$matching_pages[$pageid]["nterms"][$qterm] = $page_entry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Loop over the pageindex and search the titles / tags
|
||||||
|
foreach ($pageindex as $pagename => $pagedata)
|
||||||
|
{
|
||||||
|
// Get the current page's id
|
||||||
|
$pageid = ids::getid($pagename);
|
||||||
|
// Consider matches in the page title
|
||||||
|
if(stripos($pagename, $qterm) !== false)
|
||||||
|
{
|
||||||
|
// We found the qterm in the title
|
||||||
|
if(!isset($matching_pages[$pageid]))
|
||||||
|
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||||
|
|
||||||
|
// Set up a counter for page title matches if it doesn't exist already
|
||||||
|
if(!isset($matching_pages[$pageid]["title-matches"]))
|
||||||
|
$matching_pages[$pageid]["title-matches"] = 0;
|
||||||
|
|
||||||
|
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consider matches in the page's tags
|
||||||
|
if(isset($pagedata->tags) and // If this page has tags
|
||||||
|
stripos(implode(" ", $pagedata->tags), $qterm) !== false) // And we found the qterm in the tags
|
||||||
|
{
|
||||||
|
if(!isset($matching_pages[$pageid]))
|
||||||
|
$matching_pages[$pageid] = [ "nterms" => [] ];
|
||||||
|
|
||||||
|
// Set up a counter for tag match if there isn't one already
|
||||||
|
if(!isset($matching_pages[$pageid]["tag-matches"]))
|
||||||
|
$matching_pages[$pageid]["tag-matches"] = 0;
|
||||||
|
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
foreach($matching_pages as $pageid => &$pagedata)
|
foreach($matching_pages as $pageid => &$pagedata)
|
||||||
{
|
{
|
||||||
$pagedata["pagename"] = ids::getpagename($pageid);
|
$pagedata["pagename"] = ids::getpagename($pageid);
|
||||||
|
@ -312,6 +357,12 @@ class search
|
||||||
// todo rank by context here
|
// todo rank by context here
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Consider matches in the title / tags
|
||||||
|
if(isset($pagedata["title-matches"]))
|
||||||
|
$pagedata["rank"] += $pagedata["title-matches"] * $settings->search_title_matches_weighting;
|
||||||
|
if(isset($pagedata["tag-matches"]))
|
||||||
|
$pagedata["rank"] += $pagedata["tag-matches"] * $settings->search_tags_matches_weighting;
|
||||||
|
|
||||||
// todo remove items if the rank is below a threshold
|
// todo remove items if the rank is below a threshold
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -212,6 +212,11 @@ $settings->search_max_distance_context_display = 100;
|
||||||
// term in the context below each search result.
|
// term in the context below each search result.
|
||||||
$settings->search_characters_context = 200;
|
$settings->search_characters_context = 200;
|
||||||
|
|
||||||
|
// The weighting to give to search term matches found in a page's title.
|
||||||
|
$settings->search_title_matches_weighting = 10;
|
||||||
|
|
||||||
|
// The weighting to give to search term matches found in a page's tags.
|
||||||
|
$settings->search_tags_matches_weighting = 3;
|
||||||
|
|
||||||
// A string of css to include. Will be included in the <head> of every page
|
// A string of css to include. Will be included in the <head> of every page
|
||||||
// inside a <style> tag. This may also be a url - urls will be referenced via a
|
// inside a <style> tag. This may also be a url - urls will be referenced via a
|
||||||
|
|
Loading…
Reference in a new issue