SearchBox/SearchBox/Index.cs

72 lines
1.7 KiB
C#

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using SBRL.Utilities;
namespace SearchBox
{
[Flags]
public enum IndexOptions
{
ExcludeStopwords = 1
}
public class Index
{
private Dictionary<string, List<int>> index = new Dictionary<string, List<int>>();
private StopwordTester stopwordTester;
public Index(string inSource, IEnumerable<string> stopwords, IndexOptions options)
{
if (options.HasFlag(IndexOptions.ExcludeStopwords))
stopwordTester = new StopwordTester(stopwords);
// Tokenize the input and file it in our index
Tokenizer tokenizer = new Tokenizer(inSource);
foreach (Tuple<int, string> token in tokenizer) {
if (stopwordTester.IsStopword(token.Item2)) continue;
insert(token.Item2, token.Item1);
}
}
public Index(string inSource, IndexOptions options)
: this(inSource, EmbeddedFiles.EnumerateLines("SearchBox.EmbeddedFiles.Stopwords.txt"), options)
{
}
public Index(string inSource) : this(inSource, IndexOptions.ExcludeStopwords)
{
}
public List<int> this[string key] {
get {
return index[key];
}
}
protected void insert(string token, int offset)
{
if (!index.ContainsKey(token))
index.Add(token, new List<int>());
index[token].Add(offset);
}
public override string ToString()
{
StringBuilder result = new StringBuilder("Index: \n");
foreach (KeyValuePair<string, List<int>> item in index)
result.AppendLine($"\t{item.Key}: {string.Join(", ", item.Value)}");
return result.ToString();
}
// --------------------------------------------------------------------------------------
public static Index FromFile(string filename)
{
return new Index(File.ReadAllText(filename));
}
}
}