1
0
Fork 0
mirror of https://github.com/sbrl/Pepperminty-Wiki.git synced 2024-11-26 05:32:59 +00:00

Bugfix: Increment $i when we hit a stop word when indexing.

There's also another bug here - in that the offsets generated contain
are the index in the array of tokens, when we need it to be the index in
the source text!
This commit is contained in:
Starbeamrainbowlabs 2018-06-29 23:51:10 +01:00
parent c687a9b029
commit 8403ffd5c3
Signed by: sbrl
GPG key ID: 1BE5172E637709C2

View file

@ -548,14 +548,16 @@ class search
{ {
$nterm = $term; $nterm = $term;
// Skip over stop words (see https://en.wikipedia.org/wiki/Stop_words) // Skip over stop words (see https://en.wikipedia.org/wiki/Stop_words)
if(in_array($nterm, self::$stop_words)) continue; if(in_array($nterm, self::$stop_words)) { $i++; continue; }
if(!isset($index[$nterm])) if(!isset($index[$nterm]))
{ {
$index[$nterm] = [ "freq" => 0, "offsets" => [] ]; $index[$nterm] = [ "freq" => 0, "offsets" => [] ];
} }
// FIXME: Here we use the index of the token in the array, when we want the number of characters into the page!
$index[$nterm]["freq"]++; $index[$nterm]["freq"]++;
$index[$nterm]["offsets"][] = $i; $index[$nterm]["offsets"][] = $i;