using System; using System.Collections.Generic; using System.IO; using System.Text; using SBRL.Utilities; namespace SearchBox { [Flags] public enum IndexOptions { ExcludeStopwords = 1 } public class Index { private Dictionary> index = new Dictionary>(); private StopwordTester stopwordTester; public Index(string inSource, IEnumerable stopwords, IndexOptions options) { if (options.HasFlag(IndexOptions.ExcludeStopwords)) stopwordTester = new StopwordTester(stopwords); // Tokenize the input and file it in our index Tokenizer tokenizer = new Tokenizer(inSource); foreach (Tuple token in tokenizer) { if (stopwordTester.IsStopword(token.Item2)) continue; insert(token.Item2, token.Item1); } } public Index(string inSource, IndexOptions options) : this(inSource, EmbeddedFiles.EnumerateLines("SearchBox.EmbeddedFiles.Stopwords.txt"), options) { } public Index(string inSource) : this(inSource, IndexOptions.ExcludeStopwords) { } public List this[string key] { get { return index[key]; } } protected void insert(string token, int offset) { if (!index.ContainsKey(token)) index.Add(token, new List()); index[token].Add(offset); } public override string ToString() { StringBuilder result = new StringBuilder("Index: \n"); foreach (KeyValuePair> item in index) result.AppendLine($"\t{item.Key}: {string.Join(", ", item.Value)}"); return result.ToString(); } // -------------------------------------------------------------------------------------- public static Index FromFile(string filename) { return new Index(File.ReadAllText(filename)); } } }