|
|
|
@ -1,6 +1,8 @@
|
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Concurrent;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
using System.Threading.Tasks;
|
|
|
|
|
using MarkovGrams.Utilities;
|
|
|
|
|
using SBRL.Algorithms;
|
|
|
|
|
|
|
|
|
@ -57,9 +59,13 @@ namespace MarkovGrams
|
|
|
|
|
if (!StartOnUppercase)
|
|
|
|
|
wrandom.SetContents(ngrams);
|
|
|
|
|
else {
|
|
|
|
|
Dictionary<string, double> filteredNGrams = new Dictionary<string, double>();
|
|
|
|
|
foreach (KeyValuePair<string, double> pair in ngrams.Where((pair) => char.IsUpper(pair.Key[0])))
|
|
|
|
|
filteredNGrams.Add(pair.Key, pair.Value);
|
|
|
|
|
ConcurrentDictionary<string, double> filteredNGrams = new ConcurrentDictionary<string, double>();
|
|
|
|
|
Parallel.ForEach(ngrams, (KeyValuePair<string, double> pair) => {
|
|
|
|
|
if (!char.IsUpper(pair.Key[0])) return;
|
|
|
|
|
if (!filteredNGrams.TryAdd(pair.Key, pair.Value))
|
|
|
|
|
throw new Exception("Error: Couldn't add to uppercase staging n-gram ConcurrentDictionary!");
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (filteredNGrams.Count() == 0)
|
|
|
|
|
throw new Exception($"Error: No valid starting ngrams were found (StartOnUppercase: {StartOnUppercase}).");
|
|
|
|
|
wrandom.SetContents(filteredNGrams);
|
|
|
|
@ -87,20 +93,24 @@ namespace MarkovGrams
|
|
|
|
|
{
|
|
|
|
|
string result = RandomNgram();
|
|
|
|
|
string lastNgram = result;
|
|
|
|
|
List<int> choiceCounts = new List<int>(); int i = 0;
|
|
|
|
|
while((Mode == GenerationMode.CharacterLevel ? result.Length : result.Split(' ').Length) < length)
|
|
|
|
|
ConcurrentBag<int> choiceCounts = new ConcurrentBag<int>(); int i = 0;
|
|
|
|
|
while((Mode == GenerationMode.CharacterLevel ? result.Length : result.CountCharInstances(" ".ToCharArray()) + 1) < length)
|
|
|
|
|
{
|
|
|
|
|
wrandom.ClearContents();
|
|
|
|
|
// The substring that the next ngram in the chain needs to start with
|
|
|
|
|
string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1));
|
|
|
|
|
// Get a list of possible n-grams we could choose from next
|
|
|
|
|
Dictionary<string, double> convNextNgrams = new Dictionary<string, double>();
|
|
|
|
|
ngrams.Where(gram_data => gram_data.Key.StartsWith(nextStartsWith))
|
|
|
|
|
.ForEach((KeyValuePair<string, double> ngramData) => convNextNgrams.Add(ngramData.Key, ngramData.Value));
|
|
|
|
|
ConcurrentDictionary<string, double> convNextNgrams = new ConcurrentDictionary<string, double>();
|
|
|
|
|
Parallel.ForEach(ngrams, (KeyValuePair<string, double> ngramData) => {
|
|
|
|
|
if (!ngramData.Key.StartsWithFast(nextStartsWith)) return;
|
|
|
|
|
|
|
|
|
|
if (!convNextNgrams.TryAdd(ngramData.Key, ngramData.Value))
|
|
|
|
|
throw new Exception("Error: Failed to add to staging ngram concurrent dictionary");
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
choiceCounts.Add(convNextNgrams.Count);
|
|
|
|
|
choiceCounts.Add(convNextNgrams.Count());
|
|
|
|
|
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
|
|
|
|
|
if(convNextNgrams.Count() == 0)
|
|
|
|
|
if(convNextNgrams.Count == 0)
|
|
|
|
|
break;
|
|
|
|
|
wrandom.SetContents(convNextNgrams);
|
|
|
|
|
// Pick a random n-gram from the list
|
|
|
|
@ -109,7 +119,7 @@ namespace MarkovGrams
|
|
|
|
|
if (Mode == GenerationMode.CharacterLevel)
|
|
|
|
|
result += nextNgram[nextNgram.Length - 1];
|
|
|
|
|
else
|
|
|
|
|
result += ' ' + nextNgram.Split(' ').Last();
|
|
|
|
|
result += ' ' + nextNgram.Substring(nextNgram.LastIndexOf(' ') + 1);
|
|
|
|
|
lastNgram = nextNgram; i++;
|
|
|
|
|
}
|
|
|
|
|
wrandom.ClearContents();
|
|
|
|
|