mirror of
https://github.com/sbrl/Pepperminty-Wiki.git
synced 2024-12-22 13:45:02 +00:00
Improve search engine indexer. Fixes #117.
It is now more liberal when deciding on word boundaries.
This commit is contained in:
parent
805cf5df53
commit
81bcef9a24
4 changed files with 6 additions and 1 deletions
|
@ -18,6 +18,7 @@
|
|||
- Always display footer message at the bottom of the page.
|
||||
- Trim the image url before short image url detection (#108)
|
||||
- Fxed huge issue with `contentonly` display mode.
|
||||
- Improved the search engine indexing algorithm. It now shouldn't choke on certain special characters (`[]{}|`) and will treat them as word boundaries.
|
||||
|
||||
## v0.12.1
|
||||
|
||||
|
|
|
@ -2529,6 +2529,7 @@ class search
|
|||
public static function tokenize($source)
|
||||
{
|
||||
$source = strtolower($source);
|
||||
$source = str_replace([ '[', ']', '|', '{', '}' ], " ", $source);
|
||||
return preg_split("/((^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))|\|/u", $source, -1, PREG_SPLIT_NO_EMPTY);
|
||||
}
|
||||
|
||||
|
@ -2547,6 +2548,7 @@ class search
|
|||
foreach($pageindex as $pagename => $pagedetails)
|
||||
{
|
||||
echo("Adding $pagename to the new search index.\n\n");
|
||||
flush();
|
||||
$pagesource = utf8_encode(file_get_contents("$env->storage_prefix$pagename.md"));
|
||||
$index = self::index($pagesource);
|
||||
|
||||
|
|
|
@ -77,7 +77,7 @@
|
|||
"author": "Starbeamrainbowlabs",
|
||||
"description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.",
|
||||
"id": "feature-search",
|
||||
"lastupdate": 1476906745,
|
||||
"lastupdate": 1477425835,
|
||||
"optional": false
|
||||
},
|
||||
{
|
||||
|
|
|
@ -299,6 +299,7 @@ class search
|
|||
public static function tokenize($source)
|
||||
{
|
||||
$source = strtolower($source);
|
||||
$source = str_replace([ '[', ']', '|', '{', '}' ], " ", $source);
|
||||
return preg_split("/((^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))|\|/u", $source, -1, PREG_SPLIT_NO_EMPTY);
|
||||
}
|
||||
|
||||
|
@ -317,6 +318,7 @@ class search
|
|||
foreach($pageindex as $pagename => $pagedetails)
|
||||
{
|
||||
echo("Adding $pagename to the new search index.\n\n");
|
||||
flush();
|
||||
$pagesource = utf8_encode(file_get_contents("$env->storage_prefix$pagename.md"));
|
||||
$index = self::index($pagesource);
|
||||
|
||||
|
|
Loading…
Reference in a new issue