From 893492c5a678e508b2269504c8807484a0eec135 Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Sun, 18 Mar 2018 16:52:55 +0000 Subject: [PATCH] Finish up enhancements to search system. I _think_ it works with utf-8 everywhere in the search system? I'm not sure - lots of testing is needed. Fortunately, I know just where to do such testing..... --- build/index.php | 14 +++++++++----- module_index.json | 2 +- modules/feature-search.php | 14 +++++++++----- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/build/index.php b/build/index.php index 3148815..b8aabf3 100644 --- a/build/index.php +++ b/build/index.php @@ -3810,10 +3810,13 @@ register_module([ //echo("Extracting context for result " . $result["pagename"] . ".\n"); $context = $result["context"]; - if(strlen($context) === 0) - $context = substr($pagesource, 0, $settings->search_characters_context * 2); + if(mb_strlen($context) === 0) + $context = mb_substr($pagesource, 0, $settings->search_characters_context * 2); //echo("'Generated search context for " . $result["pagename"] . ": $context'\n"); - $context = search::highlight_context($_GET["query"], htmlentities($context)); + $context = search::highlight_context( + $_GET["query"], + preg_replace('/page.md", null, null, null, $settings->search_characters_context * 2)); @@ -4143,7 +4146,7 @@ class search */ public static function tokenize($source) { - $source = strtolower($source); + $source = Normalizer::normalize(strtolower($source), Normalizer::FORM_C); $source = preg_replace('/[\[\]\|\{\}\/]/u', " ", $source); return preg_split("/((^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))|\|/u", $source, -1, PREG_SPLIT_NO_EMPTY); } @@ -4571,7 +4574,8 @@ class search if(in_array($qterm, static::$stop_words)) continue; // From http://stackoverflow.com/a/2483859/1460422 - $context = preg_replace("/" . str_replace("/", "\/", preg_quote($qterm)) . "/iu", "$0", $context); + + $context = preg_replace("/" . preg_replace('/\\//u', "\/", preg_quote($qterm)) . "/iu", "$0", $context); } return $context; diff --git a/module_index.json b/module_index.json index 38a1a1a..f49709e 100755 --- a/module_index.json +++ b/module_index.json @@ -104,7 +104,7 @@ "author": "Starbeamrainbowlabs", "description": "Adds proper search functionality to Pepperminty Wiki using an inverted index to provide a full text search engine. If pages don't show up, then you might have hit a stop word. If not, try requesting the `invindex-rebuild` action to rebuild the inverted index from scratch.", "id": "feature-search", - "lastupdate": 1521390977, + "lastupdate": 1521391886, "optional": false }, { diff --git a/modules/feature-search.php b/modules/feature-search.php index ca5c541..e39648e 100644 --- a/modules/feature-search.php +++ b/modules/feature-search.php @@ -210,10 +210,13 @@ register_module([ //echo("Extracting context for result " . $result["pagename"] . ".\n"); $context = $result["context"]; - if(strlen($context) === 0) - $context = substr($pagesource, 0, $settings->search_characters_context * 2); + if(mb_strlen($context) === 0) + $context = mb_substr($pagesource, 0, $settings->search_characters_context * 2); //echo("'Generated search context for " . $result["pagename"] . ": $context'\n"); - $context = search::highlight_context($_GET["query"], htmlentities($context)); + $context = search::highlight_context( + $_GET["query"], + preg_replace('/page.md", null, null, null, $settings->search_characters_context * 2)); @@ -543,7 +546,7 @@ class search */ public static function tokenize($source) { - $source = strtolower($source); + $source = Normalizer::normalize(strtolower($source), Normalizer::FORM_C); $source = preg_replace('/[\[\]\|\{\}\/]/u', " ", $source); return preg_split("/((^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))|\|/u", $source, -1, PREG_SPLIT_NO_EMPTY); } @@ -971,7 +974,8 @@ class search if(in_array($qterm, static::$stop_words)) continue; // From http://stackoverflow.com/a/2483859/1460422 - $context = preg_replace("/" . str_replace("/", "\/", preg_quote($qterm)) . "/iu", "$0", $context); + + $context = preg_replace("/" . preg_replace('/\\//u', "\/", preg_quote($qterm)) . "/iu", "$0", $context); } return $context;