2018-09-02 15:36:37 +00:00
|
|
|
|
using System;
|
2018-09-02 16:45:13 +00:00
|
|
|
|
using System.Collections;
|
2018-09-02 15:36:37 +00:00
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.IO;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using SBRL.Utilities;
|
|
|
|
|
|
2018-09-11 13:27:25 +00:00
|
|
|
|
namespace LibSearchBox
|
2018-09-02 15:36:37 +00:00
|
|
|
|
{
|
|
|
|
|
[Flags]
|
|
|
|
|
public enum IndexOptions
|
|
|
|
|
{
|
|
|
|
|
ExcludeStopwords = 1
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-02 16:45:13 +00:00
|
|
|
|
public class Index : IEnumerable<KeyValuePair<string, List<int>>>
|
2018-09-02 15:36:37 +00:00
|
|
|
|
{
|
|
|
|
|
private Dictionary<string, List<int>> index = new Dictionary<string, List<int>>();
|
|
|
|
|
private StopwordTester stopwordTester;
|
|
|
|
|
|
|
|
|
|
public Index(string inSource, IEnumerable<string> stopwords, IndexOptions options)
|
|
|
|
|
{
|
|
|
|
|
if (options.HasFlag(IndexOptions.ExcludeStopwords))
|
|
|
|
|
stopwordTester = new StopwordTester(stopwords);
|
|
|
|
|
|
|
|
|
|
// Tokenize the input and file it in our index
|
|
|
|
|
Tokenizer tokenizer = new Tokenizer(inSource);
|
|
|
|
|
foreach (Tuple<int, string> token in tokenizer) {
|
|
|
|
|
if (stopwordTester.IsStopword(token.Item2)) continue;
|
|
|
|
|
insert(token.Item2, token.Item1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
2018-09-02 17:15:24 +00:00
|
|
|
|
|
2018-09-02 15:36:37 +00:00
|
|
|
|
public Index(string inSource, IndexOptions options)
|
|
|
|
|
: this(inSource, EmbeddedFiles.EnumerateLines("SearchBox.EmbeddedFiles.Stopwords.txt"), options)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
public Index(string inSource) : this(inSource, IndexOptions.ExcludeStopwords)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public List<int> this[string key] {
|
|
|
|
|
get {
|
|
|
|
|
return index[key];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected void insert(string token, int offset)
|
|
|
|
|
{
|
|
|
|
|
if (!index.ContainsKey(token))
|
|
|
|
|
index.Add(token, new List<int>());
|
|
|
|
|
index[token].Add(offset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2018-09-02 16:45:13 +00:00
|
|
|
|
|
2018-09-02 17:15:24 +00:00
|
|
|
|
public IEnumerable<string> Tokens()
|
|
|
|
|
{
|
|
|
|
|
return index.Keys;
|
|
|
|
|
}
|
2018-09-02 16:45:13 +00:00
|
|
|
|
public IEnumerable<KeyValuePair<string, List<int>>> IterateItems()
|
|
|
|
|
{
|
|
|
|
|
foreach(KeyValuePair<string, List<int>> item in index)
|
|
|
|
|
yield return item;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public IEnumerator<KeyValuePair<string, List<int>>> GetEnumerator()
|
|
|
|
|
{
|
|
|
|
|
return IterateItems().GetEnumerator();
|
|
|
|
|
}
|
|
|
|
|
IEnumerator IEnumerable.GetEnumerator()
|
|
|
|
|
{
|
|
|
|
|
return GetEnumerator();
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-02 15:36:37 +00:00
|
|
|
|
public override string ToString()
|
|
|
|
|
{
|
|
|
|
|
StringBuilder result = new StringBuilder("Index: \n");
|
|
|
|
|
foreach (KeyValuePair<string, List<int>> item in index)
|
|
|
|
|
result.AppendLine($"\t{item.Key}: {string.Join(", ", item.Value)}");
|
|
|
|
|
return result.ToString();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
public static Index FromFile(string filename)
|
|
|
|
|
{
|
|
|
|
|
return new Index(File.ReadAllText(filename));
|
|
|
|
|
}
|
2018-09-02 16:45:13 +00:00
|
|
|
|
|
2018-09-02 15:36:37 +00:00
|
|
|
|
}
|
|
|
|
|
}
|