From b5210030bd387a8c1722b51d6b7f18bcf98235fd Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Fri, 14 Sep 2018 15:54:55 +0100 Subject: [PATCH] Bugfix: Correct word-level flat n-gram generation --- MarkovGrams/NGrams.cs | 3 ++- MarkovGrams/UnweightedMarkovChain.cs | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/MarkovGrams/NGrams.cs b/MarkovGrams/NGrams.cs index e17967c..c4284ee 100644 --- a/MarkovGrams/NGrams.cs +++ b/MarkovGrams/NGrams.cs @@ -50,7 +50,8 @@ namespace MarkovGrams else { string[] parts = str.Split(" ".ToCharArray()); for (int i = 0; i < parts.Length; i++) - results.Add(string.Join(" ", str.Skip(i).Take(order)).Trim()); + results.Add(string.Join(" ", parts.Skip(i).Take(order)).Trim()); + } return results.Distinct(); } diff --git a/MarkovGrams/UnweightedMarkovChain.cs b/MarkovGrams/UnweightedMarkovChain.cs index f15c3a4..9803223 100644 --- a/MarkovGrams/UnweightedMarkovChain.cs +++ b/MarkovGrams/UnweightedMarkovChain.cs @@ -75,7 +75,7 @@ namespace MarkovGrams string result = RandomNgram(); string lastNgram = result; List choiceCounts = new List(); int i = 0; - while((Mode == GenerationMode.CharacterLevel ? result.Length : result.Split(' ').Length) < length) + while((Mode == GenerationMode.CharacterLevel ? result.Length : result.CountCharInstances(" ".ToCharArray()) + 1) < length) { // The substring that the next ngram in the chain needs to start with string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1)); @@ -91,8 +91,8 @@ namespace MarkovGrams if (Mode == GenerationMode.CharacterLevel) result += nextNgram[nextNgram.Length - 1]; else - result += ' ' + nextNgram.Split(' ').Last(); - + result += ' ' + nextNgram.Substring(nextNgram.LastIndexOf(' ') + 1); + lastNgram = nextNgram; i++; }