2018-02-17 21:19:50 +00:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
using MarkovGrams.Utilities;
|
|
|
|
|
using SBRL.Algorithms;
|
|
|
|
|
|
|
|
|
|
namespace MarkovGrams
|
|
|
|
|
{
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// An unweighted character-based markov chain.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public class WeightedMarkovChain
|
|
|
|
|
{
|
|
|
|
|
private WeightedRandom<string> wrandom = new WeightedRandom<string>();
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// The ngrams that this markov chain currently contains.
|
|
|
|
|
/// </summary>
|
2018-09-02 20:52:22 +00:00
|
|
|
|
private Dictionary<string, double> ngrams;
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Whether to always start generating a new word from an n-gram that starts with
|
|
|
|
|
/// an uppercase letter.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public bool StartOnUppercase = false;
|
2018-02-17 21:19:50 +00:00
|
|
|
|
|
2018-09-03 13:06:50 +00:00
|
|
|
|
/// <summary>
|
|
|
|
|
/// The generation mode to use when running the Markov Chain.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <remarks>
|
|
|
|
|
/// The input n-grams must have been generated using the same mode specified here.
|
|
|
|
|
/// </remarks>
|
|
|
|
|
public GenerationMode Mode { get; private set; } = GenerationMode.CharacterLevel;
|
|
|
|
|
|
2018-02-17 21:19:50 +00:00
|
|
|
|
/// <summary>
|
|
|
|
|
/// Creates a new character-based markov chain.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="inNgrams">The ngrams to populate the new markov chain with.</param>
|
2018-09-03 13:06:50 +00:00
|
|
|
|
public WeightedMarkovChain(Dictionary<string, double> inNgrams, GenerationMode inMode) {
|
2018-09-02 20:52:22 +00:00
|
|
|
|
ngrams = inNgrams;
|
2018-09-03 13:06:50 +00:00
|
|
|
|
Mode = inMode;
|
2018-09-02 20:52:22 +00:00
|
|
|
|
}
|
2018-09-03 13:06:50 +00:00
|
|
|
|
public WeightedMarkovChain(Dictionary<string, int> inNgrams, GenerationMode inMode) {
|
2018-02-17 21:19:50 +00:00
|
|
|
|
ngrams = new Dictionary<string, double>();
|
2018-09-02 20:52:22 +00:00
|
|
|
|
foreach (KeyValuePair<string, int> ngram in inNgrams)
|
|
|
|
|
ngrams[ngram.Key] = ngram.Value;
|
2018-09-03 13:06:50 +00:00
|
|
|
|
Mode = inMode;
|
2018-02-17 21:19:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Returns a random ngram that's currently loaded into this WeightedMarkovChain.
|
|
|
|
|
/// </summary>
|
2018-09-02 20:52:22 +00:00
|
|
|
|
/// <returns>A random ngram from this UnweightedMarkovChain's cache of ngrams.</returns>
|
2018-02-17 21:19:50 +00:00
|
|
|
|
public string RandomNgram()
|
|
|
|
|
{
|
2018-09-02 20:52:22 +00:00
|
|
|
|
if (wrandom.Count == 0) {
|
|
|
|
|
if (!StartOnUppercase)
|
|
|
|
|
wrandom.SetContents(ngrams);
|
|
|
|
|
else {
|
|
|
|
|
Dictionary<string, double> filteredNGrams = new Dictionary<string, double>();
|
|
|
|
|
foreach (KeyValuePair<string, double> pair in ngrams.Where((pair) => char.IsUpper(pair.Key[0])))
|
|
|
|
|
filteredNGrams.Add(pair.Key, pair.Value);
|
|
|
|
|
if (filteredNGrams.Count() == 0)
|
|
|
|
|
throw new Exception($"Error: No valid starting ngrams were found (StartOnUppercase: {StartOnUppercase}).");
|
|
|
|
|
wrandom.SetContents(filteredNGrams);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-17 21:19:50 +00:00
|
|
|
|
return wrandom.Next();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Generates a new random string from the currently stored ngrams.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="length">
|
|
|
|
|
/// The length of ngram to generate.
|
|
|
|
|
/// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will
|
|
|
|
|
/// result in a string of length 3. Also, depending on the current ngrams this markov chain contains,
|
|
|
|
|
/// it may end up being cut short.
|
|
|
|
|
/// </param>
|
|
|
|
|
/// <returns>A new random string.</returns>
|
|
|
|
|
public string Generate(int length)
|
2018-09-03 15:21:38 +00:00
|
|
|
|
{
|
|
|
|
|
return Generate(length, out float noop);
|
|
|
|
|
}
|
|
|
|
|
public string Generate(int length, out float choicePointRatio)
|
2018-02-17 21:19:50 +00:00
|
|
|
|
{
|
|
|
|
|
string result = RandomNgram();
|
|
|
|
|
string lastNgram = result;
|
2018-09-03 15:21:38 +00:00
|
|
|
|
List<int> choiceCounts = new List<int>(); int i = 0;
|
2018-09-03 14:48:27 +00:00
|
|
|
|
while((Mode == GenerationMode.CharacterLevel ? result.Length : result.Split(' ').Length) < length)
|
2018-02-17 21:19:50 +00:00
|
|
|
|
{
|
|
|
|
|
wrandom.ClearContents();
|
|
|
|
|
// The substring that the next ngram in the chain needs to start with
|
2018-09-03 15:38:20 +00:00
|
|
|
|
string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1));
|
2018-02-17 21:19:50 +00:00
|
|
|
|
// Get a list of possible n-grams we could choose from next
|
|
|
|
|
Dictionary<string, double> convNextNgrams = new Dictionary<string, double>();
|
|
|
|
|
ngrams.Where(gram_data => gram_data.Key.StartsWith(nextStartsWith))
|
|
|
|
|
.ForEach((KeyValuePair<string, double> ngramData) => convNextNgrams.Add(ngramData.Key, ngramData.Value));
|
2018-09-03 15:21:38 +00:00
|
|
|
|
|
|
|
|
|
choiceCounts.Add(convNextNgrams.Count);
|
2018-02-17 21:19:50 +00:00
|
|
|
|
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
|
|
|
|
|
if(convNextNgrams.Count() == 0)
|
|
|
|
|
break;
|
|
|
|
|
wrandom.SetContents(convNextNgrams);
|
|
|
|
|
// Pick a random n-gram from the list
|
|
|
|
|
string nextNgram = wrandom.Next();
|
|
|
|
|
// Add the last character from the n-gram to the string we're building
|
2018-09-03 13:06:50 +00:00
|
|
|
|
if (Mode == GenerationMode.CharacterLevel)
|
|
|
|
|
result += nextNgram[nextNgram.Length - 1];
|
|
|
|
|
else
|
2018-09-03 15:38:20 +00:00
|
|
|
|
result += ' ' + nextNgram.Split(' ').Last();
|
2018-09-03 15:21:38 +00:00
|
|
|
|
lastNgram = nextNgram; i++;
|
2018-02-17 21:19:50 +00:00
|
|
|
|
}
|
|
|
|
|
wrandom.ClearContents();
|
2018-09-03 15:21:38 +00:00
|
|
|
|
|
|
|
|
|
if (choiceCounts.Sum() > 0)
|
|
|
|
|
choicePointRatio = (float)choiceCounts.Sum() / (float)(i + 1);
|
|
|
|
|
else
|
|
|
|
|
choicePointRatio = 0;
|
2018-02-17 21:19:50 +00:00
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|