Bugfix: Correct word-level flat n-gram generation

This commit is contained in:
Starbeamrainbowlabs 2018-09-14 15:54:55 +01:00
parent fd23a8bb69
commit b5210030bd
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
2 changed files with 5 additions and 4 deletions

View file

@ -50,7 +50,8 @@ namespace MarkovGrams
else { else {
string[] parts = str.Split(" ".ToCharArray()); string[] parts = str.Split(" ".ToCharArray());
for (int i = 0; i < parts.Length; i++) for (int i = 0; i < parts.Length; i++)
results.Add(string.Join(" ", str.Skip(i).Take(order)).Trim()); results.Add(string.Join(" ", parts.Skip(i).Take(order)).Trim());
} }
return results.Distinct(); return results.Distinct();
} }

View file

@ -75,7 +75,7 @@ namespace MarkovGrams
string result = RandomNgram(); string result = RandomNgram();
string lastNgram = result; string lastNgram = result;
List<int> choiceCounts = new List<int>(); int i = 0; List<int> choiceCounts = new List<int>(); int i = 0;
while((Mode == GenerationMode.CharacterLevel ? result.Length : result.Split(' ').Length) < length) while((Mode == GenerationMode.CharacterLevel ? result.Length : result.CountCharInstances(" ".ToCharArray()) + 1) < length)
{ {
// The substring that the next ngram in the chain needs to start with // The substring that the next ngram in the chain needs to start with
string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1)); string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1));
@ -91,8 +91,8 @@ namespace MarkovGrams
if (Mode == GenerationMode.CharacterLevel) if (Mode == GenerationMode.CharacterLevel)
result += nextNgram[nextNgram.Length - 1]; result += nextNgram[nextNgram.Length - 1];
else else
result += ' ' + nextNgram.Split(' ').Last(); result += ' ' + nextNgram.Substring(nextNgram.LastIndexOf(' ') + 1);
lastNgram = nextNgram; i++; lastNgram = nextNgram; i++;
} }