Do some optimising to make it better work with Shakespeare. The weighted generator now does things in parallel!

This commit is contained in:
Starbeamrainbowlabs 2018-09-03 22:19:57 +01:00
parent 83d27d2289
commit 1a984d538c
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
4 changed files with 47 additions and 17 deletions

View file

@ -1,6 +1,7 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using MarkovGrams.Utilities;
namespace MarkovGrams namespace MarkovGrams
{ {
@ -79,7 +80,7 @@ namespace MarkovGrams
// The substring that the next ngram in the chain needs to start with // The substring that the next ngram in the chain needs to start with
string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1)); string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1));
// Get a list of possible n-grams we could choose from next // Get a list of possible n-grams we could choose from next
List<string> nextNgrams = ngrams.FindAll(gram => gram.StartsWith(nextStartsWith)); List<string> nextNgrams = ngrams.FindAll(gram => gram.StartsWithFast(nextStartsWith));
choiceCounts.Add(nextNgrams.Count); choiceCounts.Add(nextNgrams.Count);
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-( // If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
if(nextNgrams.Count == 0) if(nextNgrams.Count == 0)

View file

@ -20,5 +20,21 @@ namespace MarkovGrams.Utilities
list.RemoveAt(index); list.RemoveAt(index);
return item; return item;
} }
public static int CountCharInstances(this string str, char[] targets)
{
int result = 0;
for (int i = 0; i < str.Length; i++) {
for (int t = 0; t < targets.Length; t++)
if (str[i] == targets[t]) result++;
}
return result;
}
public static bool StartsWithFast(this string str, string target)
{
if (str.Length < target.Length) return false;
return str.Substring(0, target.Length) == target;
}
} }
} }

View file

@ -1,6 +1,8 @@
using System; using System;
using System.Collections.Concurrent;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Threading.Tasks;
namespace SBRL.Algorithms namespace SBRL.Algorithms
{ {
@ -26,7 +28,7 @@ namespace SBRL.Algorithms
{ {
private Random rand = new Random(); private Random rand = new Random();
protected Dictionary<ItemType, double> weights = new Dictionary<ItemType, double>(); protected ConcurrentDictionary<ItemType, double> weights = new ConcurrentDictionary<ItemType, double>();
public int Count { public int Count {
get { get {
@ -55,10 +57,11 @@ namespace SBRL.Algorithms
if (items.Count == 0) if (items.Count == 0)
throw new ArgumentException("Error: The items dictionary provided is empty!"); throw new ArgumentException("Error: The items dictionary provided is empty!");
double totalWeight = items.Values.Aggregate((double a, double b) => a + b); double totalWeight = items.Values.Sum();
foreach (KeyValuePair<ItemType, double> itemData in items) { Parallel.ForEach(items, (KeyValuePair<ItemType, double> itemData) => {
weights.Add(itemData.Key, itemData.Value / totalWeight); if (!weights.TryAdd(itemData.Key, itemData.Value / totalWeight))
} throw new Exception("WeightedRandom: Failed to add new weight definition to weights ConcurrentDictionary!");
});
} }
public void ClearContents() public void ClearContents()
{ {

View file

@ -1,6 +1,8 @@
using System; using System;
using System.Collections.Concurrent;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Threading.Tasks;
using MarkovGrams.Utilities; using MarkovGrams.Utilities;
using SBRL.Algorithms; using SBRL.Algorithms;
@ -57,9 +59,13 @@ namespace MarkovGrams
if (!StartOnUppercase) if (!StartOnUppercase)
wrandom.SetContents(ngrams); wrandom.SetContents(ngrams);
else { else {
Dictionary<string, double> filteredNGrams = new Dictionary<string, double>(); ConcurrentDictionary<string, double> filteredNGrams = new ConcurrentDictionary<string, double>();
foreach (KeyValuePair<string, double> pair in ngrams.Where((pair) => char.IsUpper(pair.Key[0]))) Parallel.ForEach(ngrams, (KeyValuePair<string, double> pair) => {
filteredNGrams.Add(pair.Key, pair.Value); if (!char.IsUpper(pair.Key[0])) return;
if (!filteredNGrams.TryAdd(pair.Key, pair.Value))
throw new Exception("Error: Couldn't add to uppercase staging n-gram ConcurrentDictionary!");
});
if (filteredNGrams.Count() == 0) if (filteredNGrams.Count() == 0)
throw new Exception($"Error: No valid starting ngrams were found (StartOnUppercase: {StartOnUppercase})."); throw new Exception($"Error: No valid starting ngrams were found (StartOnUppercase: {StartOnUppercase}).");
wrandom.SetContents(filteredNGrams); wrandom.SetContents(filteredNGrams);
@ -87,20 +93,24 @@ namespace MarkovGrams
{ {
string result = RandomNgram(); string result = RandomNgram();
string lastNgram = result; string lastNgram = result;
List<int> choiceCounts = new List<int>(); int i = 0; ConcurrentBag<int> choiceCounts = new ConcurrentBag<int>(); int i = 0;
while((Mode == GenerationMode.CharacterLevel ? result.Length : result.Split(' ').Length) < length) while((Mode == GenerationMode.CharacterLevel ? result.Length : result.CountCharInstances(" ".ToCharArray()) + 1) < length)
{ {
wrandom.ClearContents(); wrandom.ClearContents();
// The substring that the next ngram in the chain needs to start with // The substring that the next ngram in the chain needs to start with
string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1)); string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1));
// Get a list of possible n-grams we could choose from next // Get a list of possible n-grams we could choose from next
Dictionary<string, double> convNextNgrams = new Dictionary<string, double>(); ConcurrentDictionary<string, double> convNextNgrams = new ConcurrentDictionary<string, double>();
ngrams.Where(gram_data => gram_data.Key.StartsWith(nextStartsWith)) Parallel.ForEach(ngrams, (KeyValuePair<string, double> ngramData) => {
.ForEach((KeyValuePair<string, double> ngramData) => convNextNgrams.Add(ngramData.Key, ngramData.Value)); if (!ngramData.Key.StartsWithFast(nextStartsWith)) return;
choiceCounts.Add(convNextNgrams.Count); if (!convNextNgrams.TryAdd(ngramData.Key, ngramData.Value))
throw new Exception("Error: Failed to add to staging ngram concurrent dictionary");
});
choiceCounts.Add(convNextNgrams.Count());
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-( // If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
if(convNextNgrams.Count() == 0) if(convNextNgrams.Count == 0)
break; break;
wrandom.SetContents(convNextNgrams); wrandom.SetContents(convNextNgrams);
// Pick a random n-gram from the list // Pick a random n-gram from the list
@ -109,7 +119,7 @@ namespace MarkovGrams
if (Mode == GenerationMode.CharacterLevel) if (Mode == GenerationMode.CharacterLevel)
result += nextNgram[nextNgram.Length - 1]; result += nextNgram[nextNgram.Length - 1];
else else
result += ' ' + nextNgram.Split(' ').Last(); result += ' ' + nextNgram.Substring(nextNgram.LastIndexOf(' ') + 1);
lastNgram = nextNgram; i++; lastNgram = nextNgram; i++;
} }
wrandom.ClearContents(); wrandom.ClearContents();