SearchBox/SearchBox/Index.cs

95 lines
2.2 KiB
C#

using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using SBRL.Utilities;
namespace LibSearchBox
{
[Flags]
public enum IndexOptions
{
ExcludeStopwords = 1
}
public class Index : IEnumerable<KeyValuePair<string, List<int>>>
{
private Dictionary<string, List<int>> index = new Dictionary<string, List<int>>();
private StopwordTester stopwordTester;
public Index(string inSource, IEnumerable<string> stopwords, IndexOptions options)
{
if (options.HasFlag(IndexOptions.ExcludeStopwords))
stopwordTester = new StopwordTester(stopwords);
// Tokenize the input and file it in our index
Tokenizer tokenizer = new Tokenizer(inSource);
foreach ((int, string) token in tokenizer) {
if (stopwordTester.IsStopword(token.Item2)) continue;
insert(token.Item2, token.Item1);
}
}
public Index(string inSource, IndexOptions options)
: this(inSource, EmbeddedFiles.EnumerateLines("LibSearchBox.EmbeddedFiles.Stopwords.txt"), options)
{
}
public Index(string inSource) : this(inSource, IndexOptions.ExcludeStopwords)
{
}
public List<int> this[string key] {
get {
return index[key];
}
}
protected void insert(string token, int offset)
{
if (!index.ContainsKey(token))
index.Add(token, new List<int>());
index[token].Add(offset);
}
public IEnumerable<string> Tokens()
{
return index.Keys;
}
public IEnumerable<KeyValuePair<string, List<int>>> IterateItems()
{
foreach(KeyValuePair<string, List<int>> item in index)
yield return item;
}
public IEnumerator<KeyValuePair<string, List<int>>> GetEnumerator()
{
return IterateItems().GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public override string ToString()
{
StringBuilder result = new StringBuilder("Index: \n");
foreach (KeyValuePair<string, List<int>> item in index)
result.AppendLine($"\t{item.Key}: {string.Join(", ", item.Value)}");
return result.ToString();
}
// --------------------------------------------------------------------------------------
public static Index FromFile(string filename)
{
return new Index(File.ReadAllText(filename));
}
}
}