Update search to take the length of words into account. Fixes #134.

This commit is contained in:
Starbeamrainbowlabs 2017-10-15 13:42:15 +01:00
parent 190798b61e
commit edba3b9bf3
4 changed files with 10 additions and 7 deletions

View File

@ -27,6 +27,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t
- Make `invindex-rebuild` admin-only, but allow access if the POST param `secret` is set to your secret in `peppermint.json`
- Improved the wording of the deletion confirmation and error pages
- Search now searches matching tags as well as pages
- Updated the search ranking algorithm to put more weight against longer words than shorter ones.
## v0.14

View File

@ -4182,7 +4182,7 @@ class search
if(!isset($matching_pages[$pageid]["title-matches"]))
$matching_pages[$pageid]["title-matches"] = 0;
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm));
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm)) * strlen($qterm);
}
// Consider matches in the page's tags
@ -4195,7 +4195,7 @@ class search
// Set up a counter for tag match if there isn't one already
if(!isset($matching_pages[$pageid]["tag-matches"]))
$matching_pages[$pageid]["tag-matches"] = 0;
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm));
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm)) * strlen($qterm);
}
}
}
@ -4212,7 +4212,8 @@ class search
foreach($pagedata["nterms"] as $pterm => $entry)
{
// Add the number of occurrences of this search term to the ranking
$pagedata["rank"] += $entry["freq"];
// Multiply it by the length of the word
$pagedata["rank"] += $entry["freq"] * strlen($pterm);
// Add the offsets to a listof all offsets on this page
foreach($entry["offsets"] as $offset)

View File

@ -104,7 +104,7 @@
"author": "Starbeamrainbowlabs",
"description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
"id": "feature-search",
"lastupdate": 1508017313,
"lastupdate": 1508071155,
"optional": false
},
{

View File

@ -727,7 +727,7 @@ class search
if(!isset($matching_pages[$pageid]["title-matches"]))
$matching_pages[$pageid]["title-matches"] = 0;
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm));
$matching_pages[$pageid]["title-matches"] += count(mb_stripos_all($pagename, $qterm)) * strlen($qterm);
}
// Consider matches in the page's tags
@ -740,7 +740,7 @@ class search
// Set up a counter for tag match if there isn't one already
if(!isset($matching_pages[$pageid]["tag-matches"]))
$matching_pages[$pageid]["tag-matches"] = 0;
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm));
$matching_pages[$pageid]["tag-matches"] += count(mb_stripos_all(implode(" ", $pagedata->tags), $qterm)) * strlen($qterm);
}
}
}
@ -757,7 +757,8 @@ class search
foreach($pagedata["nterms"] as $pterm => $entry)
{
// Add the number of occurrences of this search term to the ranking
$pagedata["rank"] += $entry["freq"];
// Multiply it by the length of the word
$pagedata["rank"] += $entry["freq"] * strlen($pterm);
// Add the offsets to a listof all offsets on this page
foreach($entry["offsets"] as $offset)