MarkovGrams/MarkovGrams/WeightedMarkovChain.cs

86 lines
2.7 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using MarkovGrams.Utilities;
using SBRL.Algorithms;
namespace MarkovGrams
{
/// <summary>
/// An unweighted character-based markov chain.
/// </summary>
public class WeightedMarkovChain
{
private WeightedRandom<string> wrandom = new WeightedRandom<string>();
/// <summary>
/// The ngrams that this markov chain currently contains.
/// </summary>
Dictionary<string, double> ngrams;
/// <summary>
/// Creates a new character-based markov chain.
/// </summary>
/// <param name="inNgrams">The ngrams to populate the new markov chain with.</param>
public WeightedMarkovChain(IEnumerable<string> inNgrams)
{
ngrams = new Dictionary<string, double>();
foreach (string ngram in inNgrams)
{
if (ngrams.ContainsKey(ngram))
ngrams[ngram]++;
else
ngrams.Add(ngram, 1);
}
}
/// <summary>
/// Returns a random ngram that's currently loaded into this WeightedMarkovChain.
/// </summary>
/// <returns>A random ngram from this UnweightMarkovChain's cache of ngrams.</returns>
public string RandomNgram()
{
if (wrandom.Count == 0)
wrandom.SetContents(ngrams);
return wrandom.Next();
}
/// <summary>
/// Generates a new random string from the currently stored ngrams.
/// </summary>
/// <param name="length">
/// The length of ngram to generate.
/// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will
/// result in a string of length 3. Also, depending on the current ngrams this markov chain contains,
/// it may end up being cut short.
/// </param>
/// <returns>A new random string.</returns>
public string Generate(int length)
{
string result = RandomNgram();
string lastNgram = result;
while(result.Length < length)
{
wrandom.ClearContents();
// The substring that the next ngram in the chain needs to start with
string nextStartsWith = lastNgram.Substring(1);
// Get a list of possible n-grams we could choose from next
Dictionary<string, double> convNextNgrams = new Dictionary<string, double>();
ngrams.Where(gram_data => gram_data.Key.StartsWith(nextStartsWith))
.ForEach((KeyValuePair<string, double> ngramData) => convNextNgrams.Add(ngramData.Key, ngramData.Value));
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
if(convNextNgrams.Count() == 0)
break;
wrandom.SetContents(convNextNgrams);
// Pick a random n-gram from the list
string nextNgram = wrandom.Next();
// Add the last character from the n-gram to the string we're building
result += nextNgram[nextNgram.Length - 1];
lastNgram = nextNgram;
}
wrandom.ClearContents();
return result;
}
}
}