using System; using System.Collections; using System.Collections.Generic; using System.IO; using System.Text; using SBRL.Utilities; namespace LibSearchBox { [Flags] public enum IndexOptions { ExcludeStopwords = 1 } public class Index : IEnumerable>> { private Dictionary> index = new Dictionary>(); private StopwordTester stopwordTester; public Index(string inSource, IEnumerable stopwords, IndexOptions options) { if (options.HasFlag(IndexOptions.ExcludeStopwords)) stopwordTester = new StopwordTester(stopwords); // Tokenize the input and file it in our index Tokenizer tokenizer = new Tokenizer(inSource); foreach ((int, string) token in tokenizer) { if (stopwordTester.IsStopword(token.Item2)) continue; insert(token.Item2, token.Item1); } } public Index(string inSource, IndexOptions options) : this(inSource, EmbeddedFiles.EnumerateLines("LibSearchBox.EmbeddedFiles.Stopwords.txt"), options) { } public Index(string inSource) : this(inSource, IndexOptions.ExcludeStopwords) { } public List this[string key] { get { return index[key]; } } protected void insert(string token, int offset) { if (!index.ContainsKey(token)) index.Add(token, new List()); index[token].Add(offset); } public IEnumerable Tokens() { return index.Keys; } public IEnumerable>> IterateItems() { foreach(KeyValuePair> item in index) yield return item; } public IEnumerator>> GetEnumerator() { return IterateItems().GetEnumerator(); } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } public override string ToString() { StringBuilder result = new StringBuilder("Index: \n"); foreach (KeyValuePair> item in index) result.AppendLine($"\t{item.Key}: {string.Join(", ", item.Value)}"); return result.ToString(); } // -------------------------------------------------------------------------------------- public static Index FromFile(string filename) { return new Index(File.ReadAllText(filename)); } } }