87 lines
2.9 KiB
C#
87 lines
2.9 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
|
|
namespace MarkovGrams
|
|
{
|
|
/// <summary>
|
|
/// A collection of methods to generate various different types of n-grams.
|
|
/// </summary>
|
|
public static class NGrams
|
|
{
|
|
/// <summary>
|
|
/// Generates a unique list of n-grams that the given list of words.
|
|
/// </summary>
|
|
/// <param name="words">The words to turn into n-grams.</param>
|
|
/// <param name="order">The order of n-gram to generate..</param>
|
|
/// <returns>A unique list of n-grams found in the given list of words.</returns>
|
|
public static IEnumerable<string> GenerateFlat(IEnumerable<string> words, int order, bool distinct = true)
|
|
{
|
|
List<string> results = new List<string>();
|
|
foreach (string word in words)
|
|
{
|
|
results.AddRange(GenerateFlat(word, order));
|
|
}
|
|
if (distinct) return results.Distinct();
|
|
return results;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Generates a unique list of n-grams from the given string.
|
|
/// </summary>
|
|
/// <param name="str">The string to n-gram-ise.</param>
|
|
/// <param name="order">The order of n-gram to generate.</param>
|
|
/// <returns>A unique list of n-grams found in the specified string.</returns>
|
|
public static IEnumerable<string> GenerateFlat(string str, int order)
|
|
{
|
|
List<string> results = new List<string>();
|
|
for(int i = 0; i < str.Length - order; i++)
|
|
{
|
|
results.Add(str.Substring(i, order));
|
|
}
|
|
return results.Distinct();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Generates a dictionary of weighted n-grams from the given list of words.
|
|
/// The key is the ngram itself, and the value is the linear weight of the ngram.
|
|
/// </summary>
|
|
/// <param name="words">The words to n-gram-ise.</param>
|
|
/// <param name="order">The order of ngrams to generate.</param>
|
|
/// <returns>The weighted dictionary of ngrams.</returns>
|
|
public static Dictionary<string, int> GenerateWeighted(IEnumerable<string> words, int order)
|
|
{
|
|
Dictionary<string, int> results = new Dictionary<string, int>();
|
|
foreach(string word in words)
|
|
{
|
|
Dictionary<string, int> wordNgrams = GenerateWeighted(word, order);
|
|
foreach(KeyValuePair<string, int> ngram in wordNgrams)
|
|
{
|
|
if(!results.ContainsKey(ngram.Key))
|
|
results[ngram.Key] = 0;
|
|
results[ngram.Key] += ngram.Value;
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
/// <summary>
|
|
/// Generates a dictionary of weighted n-grams from the specified string.
|
|
/// </summary>
|
|
/// <param name="str">The string to n-gram-ise.</param>
|
|
/// <param name="order">The order of n-grams to generate.</param>
|
|
/// <returns>The weighted dictionary of ngrams.</returns>
|
|
public static Dictionary<string, int> GenerateWeighted(string str, int order)
|
|
{
|
|
Dictionary<string, int> results = new Dictionary<string, int>();
|
|
for(int i = 0; i < str.Length - order; i++)
|
|
{
|
|
string ngram = str.Substring(i, order);
|
|
if(!results.ContainsKey(ngram))
|
|
results[ngram] = 0;
|
|
results[ngram]++;
|
|
}
|
|
return results;
|
|
}
|
|
}
|
|
}
|