From 1a984d538cba5215cf3c157dbaab9165641b7f71 Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Mon, 3 Sep 2018 22:19:57 +0100 Subject: [PATCH] Do some optimising to make it better work with Shakespeare. The weighted generator now does things in parallel! --- MarkovGrams/UnweightedMarkovChain.cs | 3 ++- MarkovGrams/Utilities/LinqExtensions.cs | 16 +++++++++++++ MarkovGrams/Utilities/WeightedRandom.cs | 13 ++++++---- MarkovGrams/WeightedMarkovChain.cs | 32 ++++++++++++++++--------- 4 files changed, 47 insertions(+), 17 deletions(-) diff --git a/MarkovGrams/UnweightedMarkovChain.cs b/MarkovGrams/UnweightedMarkovChain.cs index 726f47f..f15c3a4 100644 --- a/MarkovGrams/UnweightedMarkovChain.cs +++ b/MarkovGrams/UnweightedMarkovChain.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using MarkovGrams.Utilities; namespace MarkovGrams { @@ -79,7 +80,7 @@ namespace MarkovGrams // The substring that the next ngram in the chain needs to start with string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1)); // Get a list of possible n-grams we could choose from next - List nextNgrams = ngrams.FindAll(gram => gram.StartsWith(nextStartsWith)); + List nextNgrams = ngrams.FindAll(gram => gram.StartsWithFast(nextStartsWith)); choiceCounts.Add(nextNgrams.Count); // If there aren't any choices left, we can't exactly keep adding to the new string any more :-( if(nextNgrams.Count == 0) diff --git a/MarkovGrams/Utilities/LinqExtensions.cs b/MarkovGrams/Utilities/LinqExtensions.cs index 15e6ca3..88464f2 100644 --- a/MarkovGrams/Utilities/LinqExtensions.cs +++ b/MarkovGrams/Utilities/LinqExtensions.cs @@ -20,5 +20,21 @@ namespace MarkovGrams.Utilities list.RemoveAt(index); return item; } + + public static int CountCharInstances(this string str, char[] targets) + { + int result = 0; + for (int i = 0; i < str.Length; i++) { + for (int t = 0; t < targets.Length; t++) + if (str[i] == targets[t]) result++; + } + return result; + } + + public static bool StartsWithFast(this string str, string target) + { + if (str.Length < target.Length) return false; + return str.Substring(0, target.Length) == target; + } } } diff --git a/MarkovGrams/Utilities/WeightedRandom.cs b/MarkovGrams/Utilities/WeightedRandom.cs index 3d1b211..0b0f115 100644 --- a/MarkovGrams/Utilities/WeightedRandom.cs +++ b/MarkovGrams/Utilities/WeightedRandom.cs @@ -1,6 +1,8 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; +using System.Threading.Tasks; namespace SBRL.Algorithms { @@ -26,7 +28,7 @@ namespace SBRL.Algorithms { private Random rand = new Random(); - protected Dictionary weights = new Dictionary(); + protected ConcurrentDictionary weights = new ConcurrentDictionary(); public int Count { get { @@ -55,10 +57,11 @@ namespace SBRL.Algorithms if (items.Count == 0) throw new ArgumentException("Error: The items dictionary provided is empty!"); - double totalWeight = items.Values.Aggregate((double a, double b) => a + b); - foreach (KeyValuePair itemData in items) { - weights.Add(itemData.Key, itemData.Value / totalWeight); - } + double totalWeight = items.Values.Sum(); + Parallel.ForEach(items, (KeyValuePair itemData) => { + if (!weights.TryAdd(itemData.Key, itemData.Value / totalWeight)) + throw new Exception("WeightedRandom: Failed to add new weight definition to weights ConcurrentDictionary!"); + }); } public void ClearContents() { diff --git a/MarkovGrams/WeightedMarkovChain.cs b/MarkovGrams/WeightedMarkovChain.cs index 34616e5..032618e 100644 --- a/MarkovGrams/WeightedMarkovChain.cs +++ b/MarkovGrams/WeightedMarkovChain.cs @@ -1,6 +1,8 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; +using System.Threading.Tasks; using MarkovGrams.Utilities; using SBRL.Algorithms; @@ -57,9 +59,13 @@ namespace MarkovGrams if (!StartOnUppercase) wrandom.SetContents(ngrams); else { - Dictionary filteredNGrams = new Dictionary(); - foreach (KeyValuePair pair in ngrams.Where((pair) => char.IsUpper(pair.Key[0]))) - filteredNGrams.Add(pair.Key, pair.Value); + ConcurrentDictionary filteredNGrams = new ConcurrentDictionary(); + Parallel.ForEach(ngrams, (KeyValuePair pair) => { + if (!char.IsUpper(pair.Key[0])) return; + if (!filteredNGrams.TryAdd(pair.Key, pair.Value)) + throw new Exception("Error: Couldn't add to uppercase staging n-gram ConcurrentDictionary!"); + }); + if (filteredNGrams.Count() == 0) throw new Exception($"Error: No valid starting ngrams were found (StartOnUppercase: {StartOnUppercase})."); wrandom.SetContents(filteredNGrams); @@ -87,20 +93,24 @@ namespace MarkovGrams { string result = RandomNgram(); string lastNgram = result; - List choiceCounts = new List(); int i = 0; - while((Mode == GenerationMode.CharacterLevel ? result.Length : result.Split(' ').Length) < length) + ConcurrentBag choiceCounts = new ConcurrentBag(); int i = 0; + while((Mode == GenerationMode.CharacterLevel ? result.Length : result.CountCharInstances(" ".ToCharArray()) + 1) < length) { wrandom.ClearContents(); // The substring that the next ngram in the chain needs to start with string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1)); // Get a list of possible n-grams we could choose from next - Dictionary convNextNgrams = new Dictionary(); - ngrams.Where(gram_data => gram_data.Key.StartsWith(nextStartsWith)) - .ForEach((KeyValuePair ngramData) => convNextNgrams.Add(ngramData.Key, ngramData.Value)); + ConcurrentDictionary convNextNgrams = new ConcurrentDictionary(); + Parallel.ForEach(ngrams, (KeyValuePair ngramData) => { + if (!ngramData.Key.StartsWithFast(nextStartsWith)) return; + + if (!convNextNgrams.TryAdd(ngramData.Key, ngramData.Value)) + throw new Exception("Error: Failed to add to staging ngram concurrent dictionary"); + }); - choiceCounts.Add(convNextNgrams.Count); + choiceCounts.Add(convNextNgrams.Count()); // If there aren't any choices left, we can't exactly keep adding to the new string any more :-( - if(convNextNgrams.Count() == 0) + if(convNextNgrams.Count == 0) break; wrandom.SetContents(convNextNgrams); // Pick a random n-gram from the list @@ -109,7 +119,7 @@ namespace MarkovGrams if (Mode == GenerationMode.CharacterLevel) result += nextNgram[nextNgram.Length - 1]; else - result += ' ' + nextNgram.Split(' ').Last(); + result += ' ' + nextNgram.Substring(nextNgram.LastIndexOf(' ') + 1); lastNgram = nextNgram; i++; } wrandom.ClearContents();