using System; using System.Collections.Generic; using System.Linq; namespace MarkovGrams { /// /// A collection of methods to generate various different types of n-grams. /// public static class NGrams { /// /// Generates a unique list of n-grams that the given list of words. /// /// The words to turn into n-grams. /// The order of n-gram to generate.. /// A unique list of n-grams found in the given list of words. public static IEnumerable GenerateFlat(IEnumerable words, int order) { List results = new List(); foreach(string word in words) { results.AddRange(GenerateFlat(word, order)); } return results.Distinct(); } /// /// Generates a unique list of n-grams from the given string. /// /// The string to n-gram-ise. /// The order of n-gram to generate. /// A unique list of n-grams found in the specified string. public static IEnumerable GenerateFlat(string str, int order) { List results = new List(); for(int i = 0; i < str.Length - order; i++) { results.Add(str.Substring(i, order)); } return results.Distinct(); } /// /// Generates a dictionary of weighted n-grams from the given list of words. /// The key is the ngram itself, and the value is the linear weight of the ngram. /// /// The words to n-gram-ise. /// The order of ngrams to generate. /// The weighted dictionary of ngrams. public static Dictionary GenerateWeighted(IEnumerable words, int order) { Dictionary results = new Dictionary(); foreach(string word in words) { Dictionary wordNgrams = GenerateWeighted(word, order); foreach(KeyValuePair ngram in wordNgrams) { if(!results.ContainsKey(ngram.Key)) results[ngram.Key] = 0; results[ngram.Key] += ngram.Value; } } return results; } /// /// Generates a dictionary of weighted n-grams from the specified string. /// /// The string to n-gram-ise. /// The order of n-grams to generate. /// The weighted dictionary of ngrams. public static Dictionary GenerateWeighted(string str, int order) { Dictionary results = new Dictionary(); for(int i = 0; i < str.Length - order; i++) { string ngram = str.Substring(i, order); if(!results.ContainsKey(ngram)) results[ngram] = 0; results[ngram]++; } return results; } } }