using System;
using System.Collections.Generic;
using System.Linq;
namespace MarkovGrams
{
///
/// A collection of methods to generate various different types of n-grams.
///
public static class NGrams
{
///
/// Generates a unique list of n-grams that the given list of words.
///
/// The words to turn into n-grams.
/// The order of n-gram to generate..
/// A unique list of n-grams found in the given list of words.
public static IEnumerable GenerateFlat(IEnumerable words, int order)
{
List results = new List();
foreach(string word in words)
{
results.AddRange(GenerateFlat(word, order));
}
return results.Distinct();
}
///
/// Generates a unique list of n-grams from the given string.
///
/// The string to n-gram-ise.
/// The order of n-gram to generate.
/// A unique list of n-grams found in the specified string.
public static IEnumerable GenerateFlat(string str, int order)
{
List results = new List();
for(int i = 0; i < str.Length - order; i++)
{
results.Add(str.Substring(i, order));
}
return results.Distinct();
}
///
/// Generates a dictionary of weighted n-grams from the given list of words.
/// The key is the ngram itself, and the value is the linear weight of the ngram.
///
/// The words to n-gram-ise.
/// The order of ngrams to generate.
/// The weighted dictionary of ngrams.
public static Dictionary GenerateWeighted(IEnumerable words, int order)
{
Dictionary results = new Dictionary();
foreach(string word in words)
{
Dictionary wordNgrams = GenerateWeighted(word, order);
foreach(KeyValuePair ngram in wordNgrams)
{
if(!results.ContainsKey(ngram.Key))
results[ngram.Key] = 0;
results[ngram.Key] += ngram.Value;
}
}
return results;
}
///
/// Generates a dictionary of weighted n-grams from the specified string.
///
/// The string to n-gram-ise.
/// The order of n-grams to generate.
/// The weighted dictionary of ngrams.
public static Dictionary GenerateWeighted(string str, int order)
{
Dictionary results = new Dictionary();
for(int i = 0; i < str.Length - order; i++)
{
string ngram = str.Substring(i, order);
if(!results.ContainsKey(ngram))
results[ngram] = 0;
results[ngram]++;
}
return results;
}
}
}