Compare commits
No commits in common. "1a984d538cba5215cf3c157dbaab9165641b7f71" and "7285459604efe15ab2927074355e9348471fb5a4" have entirely different histories.
1a984d538c
...
7285459604
7 changed files with 17 additions and 149740 deletions
|
@ -94,7 +94,6 @@ namespace MarkovGrams
|
||||||
string[] parts = str.Split(" ".ToCharArray());
|
string[] parts = str.Split(" ".ToCharArray());
|
||||||
for (int i = 0; i < parts.Length - order; i++) {
|
for (int i = 0; i < parts.Length - order; i++) {
|
||||||
string ngram = string.Join(" ", parts.Skip(i).Take(order)).Trim();
|
string ngram = string.Join(" ", parts.Skip(i).Take(order)).Trim();
|
||||||
if (ngram.Trim().Length == 0) continue;
|
|
||||||
if (!results.ContainsKey(ngram))
|
if (!results.ContainsKey(ngram))
|
||||||
results[ngram] = 0;
|
results[ngram] = 0;
|
||||||
results[ngram]++;
|
results[ngram]++;
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using MarkovGrams.Utilities;
|
|
||||||
|
|
||||||
namespace MarkovGrams
|
namespace MarkovGrams
|
||||||
{
|
{
|
||||||
|
@ -80,7 +79,7 @@ namespace MarkovGrams
|
||||||
// The substring that the next ngram in the chain needs to start with
|
// The substring that the next ngram in the chain needs to start with
|
||||||
string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1));
|
string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1));
|
||||||
// Get a list of possible n-grams we could choose from next
|
// Get a list of possible n-grams we could choose from next
|
||||||
List<string> nextNgrams = ngrams.FindAll(gram => gram.StartsWithFast(nextStartsWith));
|
List<string> nextNgrams = ngrams.FindAll(gram => gram.StartsWith(nextStartsWith));
|
||||||
choiceCounts.Add(nextNgrams.Count);
|
choiceCounts.Add(nextNgrams.Count);
|
||||||
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
|
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
|
||||||
if(nextNgrams.Count == 0)
|
if(nextNgrams.Count == 0)
|
||||||
|
|
|
@ -20,21 +20,5 @@ namespace MarkovGrams.Utilities
|
||||||
list.RemoveAt(index);
|
list.RemoveAt(index);
|
||||||
return item;
|
return item;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int CountCharInstances(this string str, char[] targets)
|
|
||||||
{
|
|
||||||
int result = 0;
|
|
||||||
for (int i = 0; i < str.Length; i++) {
|
|
||||||
for (int t = 0; t < targets.Length; t++)
|
|
||||||
if (str[i] == targets[t]) result++;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static bool StartsWithFast(this string str, string target)
|
|
||||||
{
|
|
||||||
if (str.Length < target.Length) return false;
|
|
||||||
return str.Substring(0, target.Length) == target;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Concurrent;
|
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Threading.Tasks;
|
|
||||||
|
|
||||||
namespace SBRL.Algorithms
|
namespace SBRL.Algorithms
|
||||||
{
|
{
|
||||||
|
@ -28,7 +26,7 @@ namespace SBRL.Algorithms
|
||||||
{
|
{
|
||||||
private Random rand = new Random();
|
private Random rand = new Random();
|
||||||
|
|
||||||
protected ConcurrentDictionary<ItemType, double> weights = new ConcurrentDictionary<ItemType, double>();
|
protected Dictionary<ItemType, double> weights = new Dictionary<ItemType, double>();
|
||||||
|
|
||||||
public int Count {
|
public int Count {
|
||||||
get {
|
get {
|
||||||
|
@ -57,11 +55,10 @@ namespace SBRL.Algorithms
|
||||||
if (items.Count == 0)
|
if (items.Count == 0)
|
||||||
throw new ArgumentException("Error: The items dictionary provided is empty!");
|
throw new ArgumentException("Error: The items dictionary provided is empty!");
|
||||||
|
|
||||||
double totalWeight = items.Values.Sum();
|
double totalWeight = items.Values.Aggregate((double a, double b) => a + b);
|
||||||
Parallel.ForEach(items, (KeyValuePair<ItemType, double> itemData) => {
|
foreach (KeyValuePair<ItemType, double> itemData in items) {
|
||||||
if (!weights.TryAdd(itemData.Key, itemData.Value / totalWeight))
|
weights.Add(itemData.Key, itemData.Value / totalWeight);
|
||||||
throw new Exception("WeightedRandom: Failed to add new weight definition to weights ConcurrentDictionary!");
|
}
|
||||||
});
|
|
||||||
}
|
}
|
||||||
public void ClearContents()
|
public void ClearContents()
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Concurrent;
|
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Threading.Tasks;
|
|
||||||
using MarkovGrams.Utilities;
|
using MarkovGrams.Utilities;
|
||||||
using SBRL.Algorithms;
|
using SBRL.Algorithms;
|
||||||
|
|
||||||
|
@ -59,13 +57,9 @@ namespace MarkovGrams
|
||||||
if (!StartOnUppercase)
|
if (!StartOnUppercase)
|
||||||
wrandom.SetContents(ngrams);
|
wrandom.SetContents(ngrams);
|
||||||
else {
|
else {
|
||||||
ConcurrentDictionary<string, double> filteredNGrams = new ConcurrentDictionary<string, double>();
|
Dictionary<string, double> filteredNGrams = new Dictionary<string, double>();
|
||||||
Parallel.ForEach(ngrams, (KeyValuePair<string, double> pair) => {
|
foreach (KeyValuePair<string, double> pair in ngrams.Where((pair) => char.IsUpper(pair.Key[0])))
|
||||||
if (!char.IsUpper(pair.Key[0])) return;
|
filteredNGrams.Add(pair.Key, pair.Value);
|
||||||
if (!filteredNGrams.TryAdd(pair.Key, pair.Value))
|
|
||||||
throw new Exception("Error: Couldn't add to uppercase staging n-gram ConcurrentDictionary!");
|
|
||||||
});
|
|
||||||
|
|
||||||
if (filteredNGrams.Count() == 0)
|
if (filteredNGrams.Count() == 0)
|
||||||
throw new Exception($"Error: No valid starting ngrams were found (StartOnUppercase: {StartOnUppercase}).");
|
throw new Exception($"Error: No valid starting ngrams were found (StartOnUppercase: {StartOnUppercase}).");
|
||||||
wrandom.SetContents(filteredNGrams);
|
wrandom.SetContents(filteredNGrams);
|
||||||
|
@ -93,24 +87,20 @@ namespace MarkovGrams
|
||||||
{
|
{
|
||||||
string result = RandomNgram();
|
string result = RandomNgram();
|
||||||
string lastNgram = result;
|
string lastNgram = result;
|
||||||
ConcurrentBag<int> choiceCounts = new ConcurrentBag<int>(); int i = 0;
|
List<int> choiceCounts = new List<int>(); int i = 0;
|
||||||
while((Mode == GenerationMode.CharacterLevel ? result.Length : result.CountCharInstances(" ".ToCharArray()) + 1) < length)
|
while((Mode == GenerationMode.CharacterLevel ? result.Length : result.Split(' ').Length) < length)
|
||||||
{
|
{
|
||||||
wrandom.ClearContents();
|
wrandom.ClearContents();
|
||||||
// The substring that the next ngram in the chain needs to start with
|
// The substring that the next ngram in the chain needs to start with
|
||||||
string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1));
|
string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : string.Join(" ", lastNgram.Split(' ').Skip(1));
|
||||||
// Get a list of possible n-grams we could choose from next
|
// Get a list of possible n-grams we could choose from next
|
||||||
ConcurrentDictionary<string, double> convNextNgrams = new ConcurrentDictionary<string, double>();
|
Dictionary<string, double> convNextNgrams = new Dictionary<string, double>();
|
||||||
Parallel.ForEach(ngrams, (KeyValuePair<string, double> ngramData) => {
|
ngrams.Where(gram_data => gram_data.Key.StartsWith(nextStartsWith))
|
||||||
if (!ngramData.Key.StartsWithFast(nextStartsWith)) return;
|
.ForEach((KeyValuePair<string, double> ngramData) => convNextNgrams.Add(ngramData.Key, ngramData.Value));
|
||||||
|
|
||||||
if (!convNextNgrams.TryAdd(ngramData.Key, ngramData.Value))
|
choiceCounts.Add(convNextNgrams.Count);
|
||||||
throw new Exception("Error: Failed to add to staging ngram concurrent dictionary");
|
|
||||||
});
|
|
||||||
|
|
||||||
choiceCounts.Add(convNextNgrams.Count());
|
|
||||||
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
|
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
|
||||||
if(convNextNgrams.Count == 0)
|
if(convNextNgrams.Count() == 0)
|
||||||
break;
|
break;
|
||||||
wrandom.SetContents(convNextNgrams);
|
wrandom.SetContents(convNextNgrams);
|
||||||
// Pick a random n-gram from the list
|
// Pick a random n-gram from the list
|
||||||
|
@ -119,7 +109,7 @@ namespace MarkovGrams
|
||||||
if (Mode == GenerationMode.CharacterLevel)
|
if (Mode == GenerationMode.CharacterLevel)
|
||||||
result += nextNgram[nextNgram.Length - 1];
|
result += nextNgram[nextNgram.Length - 1];
|
||||||
else
|
else
|
||||||
result += ' ' + nextNgram.Substring(nextNgram.LastIndexOf(' ') + 1);
|
result += ' ' + nextNgram.Split(' ').Last();
|
||||||
lastNgram = nextNgram; i++;
|
lastNgram = nextNgram; i++;
|
||||||
}
|
}
|
||||||
wrandom.ClearContents();
|
wrandom.ClearContents();
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -25,6 +25,3 @@ curl https://stardewids.com/ | xidel --data - --css "td.ts a" | sort >Stardew-Va
|
||||||
|
|
||||||
### Recipes Wikia ###
|
### Recipes Wikia ###
|
||||||
curl http://recipes.wikia.com/sitemap-newsitemapxml-index.xml | xidel --data - --css "loc" | grep -i NS_0 | xargs -n1 -I{} sh -c 'curl {} | xidel --data - --css "loc"' | sed -e 's/^.*\///g' -e 's/_/ /g' | python -c "import urllib, sys; print urllib.unquote(sys.argv[1] if len(sys.argv) > 1 else sys.stdin.read()[0:-1])" | sort >Dishes.txt
|
curl http://recipes.wikia.com/sitemap-newsitemapxml-index.xml | xidel --data - --css "loc" | grep -i NS_0 | xargs -n1 -I{} sh -c 'curl {} | xidel --data - --css "loc"' | sed -e 's/^.*\///g' -e 's/_/ /g' | python -c "import urllib, sys; print urllib.unquote(sys.argv[1] if len(sys.argv) > 1 else sys.stdin.read()[0:-1])" | sort >Dishes.txt
|
||||||
|
|
||||||
### Shakespeare's Complete Works ###
|
|
||||||
curl https://www.gutenberg.org/files/100/100-0.txt >Shakespeares-Works.txt
|
|
||||||
|
|
Loading…
Reference in a new issue