Compare commits
2 commits
a98ad0a1a5
...
b2bfa0c6e4
Author | SHA1 | Date | |
---|---|---|---|
b2bfa0c6e4 | |||
a08be7f61d |
5 changed files with 224 additions and 1 deletions
|
@ -36,6 +36,12 @@
|
||||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||||
<Compile Include="NGrams.cs" />
|
<Compile Include="NGrams.cs" />
|
||||||
<Compile Include="UnweightedMarkovChain.cs" />
|
<Compile Include="UnweightedMarkovChain.cs" />
|
||||||
|
<Compile Include="Utilities\WeightedRandom.cs" />
|
||||||
|
<Compile Include="WeightedMarkovChain.cs" />
|
||||||
|
<Compile Include="Utilities\LinqExtensions.cs" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<Folder Include="Utilities\" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||||
</Project>
|
</Project>
|
|
@ -16,7 +16,9 @@ namespace MarkovGrams
|
||||||
Console.WriteLine();
|
Console.WriteLine();
|
||||||
Console.WriteLine("Available commands:");
|
Console.WriteLine("Available commands:");
|
||||||
Console.WriteLine(" markov:");
|
Console.WriteLine(" markov:");
|
||||||
Console.WriteLine(" Generate new words using a markov chain.");
|
Console.WriteLine(" Generate new words using an unweighted markov chain.");
|
||||||
|
Console.WriteLine(" markov-w:");
|
||||||
|
Console.WriteLine(" Generate new words using a weighted markov chain.");
|
||||||
Console.WriteLine(" ngrams:");
|
Console.WriteLine(" ngrams:");
|
||||||
Console.WriteLine(" Generate raw unique n-grams");
|
Console.WriteLine(" Generate raw unique n-grams");
|
||||||
Console.WriteLine();
|
Console.WriteLine();
|
||||||
|
@ -57,6 +59,33 @@ namespace MarkovGrams
|
||||||
for(int i = 0; i < count; i++)
|
for(int i = 0; i < count; i++)
|
||||||
Console.WriteLine(chain.Generate(desiredStringLength));
|
Console.WriteLine(chain.Generate(desiredStringLength));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case "markov-w":
|
||||||
|
if (args.Length != 5)
|
||||||
|
{
|
||||||
|
Console.WriteLine("markov-w command usage:");
|
||||||
|
Console.WriteLine(" ./MarkovGrams.exe markov-w <wordlist.txt> <order> <length> <count>");
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.WriteLine("<wordlist.txt> The path to the wordlist to read from.");
|
||||||
|
Console.WriteLine("<order> The order of the n-grams to use.");
|
||||||
|
Console.WriteLine("<length> The length of word to generate.");
|
||||||
|
Console.WriteLine("<count> The number of words to generate.");
|
||||||
|
Console.WriteLine();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
wordlistFilename = args[1];
|
||||||
|
order = int.Parse(args[2]);
|
||||||
|
int weightedDesiredStringLength = int.Parse(args[3]);
|
||||||
|
int weightedCount = int.Parse(args[4]);
|
||||||
|
|
||||||
|
words = File.ReadLines(wordlistFilename).SelectMany(word => word.Trim().Split(' '));
|
||||||
|
ngrams = NGrams.GenerateFlat(words, order);
|
||||||
|
|
||||||
|
WeightedMarkovChain weightedChain = new WeightedMarkovChain(ngrams);
|
||||||
|
|
||||||
|
for (int i = 0; i < weightedCount; i++)
|
||||||
|
Console.WriteLine(weightedChain.Generate(weightedDesiredStringLength));
|
||||||
|
break;
|
||||||
|
|
||||||
case "ngrams":
|
case "ngrams":
|
||||||
if(args.Length != 3)
|
if(args.Length != 3)
|
||||||
|
|
16
MarkovGrams/Utilities/LinqExtensions.cs
Normal file
16
MarkovGrams/Utilities/LinqExtensions.cs
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
|
||||||
|
namespace MarkovGrams.Utilities
|
||||||
|
{
|
||||||
|
public static class LinqExtensions
|
||||||
|
{
|
||||||
|
public static void ForEach<T>(this IEnumerable<T> enumerable, Action<T> action)
|
||||||
|
{
|
||||||
|
foreach (T item in enumerable)
|
||||||
|
{
|
||||||
|
action(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
87
MarkovGrams/Utilities/WeightedRandom.cs
Normal file
87
MarkovGrams/Utilities/WeightedRandom.cs
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
|
||||||
|
namespace SBRL.Algorithms
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Picks random items from a list, according to weights assigned to them.
|
||||||
|
/// </summary>
|
||||||
|
/// <remarks>
|
||||||
|
/// Higher weights mean that an item is picked more frequently than the other items.
|
||||||
|
/// </remarks>
|
||||||
|
/// <license>Mozilla Public License version 2.0</license>
|
||||||
|
/// <origin>https://gist.github.com/sbrl/9090a8c646b8d34b6e0170ddfd197d09</origin>
|
||||||
|
/// <author>Starbeamrainbowlabs (https://starbeamrainbowlabs.com/)</author>
|
||||||
|
/// <changelog>
|
||||||
|
/// v0.1 - 20th May 2017:
|
||||||
|
/// - Creation! :D
|
||||||
|
/// </changelog>
|
||||||
|
public class WeightedRandom<ItemType>
|
||||||
|
{
|
||||||
|
private Random rand = new Random();
|
||||||
|
|
||||||
|
protected Dictionary<ItemType, double> weights = new Dictionary<ItemType, double>();
|
||||||
|
|
||||||
|
public int Count {
|
||||||
|
get {
|
||||||
|
return weights.Count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Creates a new weighted random number generator.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="items">The dictionary of weights and their corresponding items.</param>
|
||||||
|
public WeightedRandom(IDictionary<ItemType, double> items)
|
||||||
|
{
|
||||||
|
SetContents(items);
|
||||||
|
}
|
||||||
|
/// <summary>
|
||||||
|
/// Createse a new empty weighted random number generator.
|
||||||
|
/// Remember to populate it before using!
|
||||||
|
/// </summary>
|
||||||
|
public WeightedRandom()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public void SetContents(IDictionary<ItemType, double> items)
|
||||||
|
{
|
||||||
|
if (items.Count == 0)
|
||||||
|
throw new ArgumentException("Error: The items dictionary provided is empty!");
|
||||||
|
|
||||||
|
double totalWeight = items.Values.Aggregate((double a, double b) => a + b);
|
||||||
|
foreach (KeyValuePair<ItemType, double> itemData in items) {
|
||||||
|
weights.Add(itemData.Key, itemData.Value / totalWeight);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void ClearContents()
|
||||||
|
{
|
||||||
|
weights.Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Picks a new random item from the list provided at initialisation, based
|
||||||
|
/// on the weights assigned to them.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>A random item, picked according to the assigned weights.</returns>
|
||||||
|
public ItemType Next()
|
||||||
|
{
|
||||||
|
if (weights.Count == 0)
|
||||||
|
throw new InvalidOperationException("Error: No weights specified! Add some with SetContents() before generating random numbers.");
|
||||||
|
double target = rand.NextDouble();
|
||||||
|
|
||||||
|
double lower = 0;
|
||||||
|
double higher = 0;
|
||||||
|
foreach (KeyValuePair<ItemType, double> weightData in weights)
|
||||||
|
{
|
||||||
|
higher += weightData.Value;
|
||||||
|
if (target >= lower && target <= higher)
|
||||||
|
return weightData.Key;
|
||||||
|
lower += weightData.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Exception($"Error: Unable to find the weight that matches {target}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
85
MarkovGrams/WeightedMarkovChain.cs
Normal file
85
MarkovGrams/WeightedMarkovChain.cs
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using MarkovGrams.Utilities;
|
||||||
|
using SBRL.Algorithms;
|
||||||
|
|
||||||
|
namespace MarkovGrams
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// An unweighted character-based markov chain.
|
||||||
|
/// </summary>
|
||||||
|
public class WeightedMarkovChain
|
||||||
|
{
|
||||||
|
private WeightedRandom<string> wrandom = new WeightedRandom<string>();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The ngrams that this markov chain currently contains.
|
||||||
|
/// </summary>
|
||||||
|
Dictionary<string, double> ngrams;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Creates a new character-based markov chain.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="inNgrams">The ngrams to populate the new markov chain with.</param>
|
||||||
|
public WeightedMarkovChain(IEnumerable<string> inNgrams)
|
||||||
|
{
|
||||||
|
ngrams = new Dictionary<string, double>();
|
||||||
|
foreach (string ngram in inNgrams)
|
||||||
|
{
|
||||||
|
if (ngrams.ContainsKey(ngram))
|
||||||
|
ngrams[ngram]++;
|
||||||
|
else
|
||||||
|
ngrams.Add(ngram, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns a random ngram that's currently loaded into this WeightedMarkovChain.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>A random ngram from this UnweightMarkovChain's cache of ngrams.</returns>
|
||||||
|
public string RandomNgram()
|
||||||
|
{
|
||||||
|
if (wrandom.Count == 0)
|
||||||
|
wrandom.SetContents(ngrams);
|
||||||
|
return wrandom.Next();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Generates a new random string from the currently stored ngrams.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="length">
|
||||||
|
/// The length of ngram to generate.
|
||||||
|
/// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will
|
||||||
|
/// result in a string of length 3. Also, depending on the current ngrams this markov chain contains,
|
||||||
|
/// it may end up being cut short.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>A new random string.</returns>
|
||||||
|
public string Generate(int length)
|
||||||
|
{
|
||||||
|
string result = RandomNgram();
|
||||||
|
string lastNgram = result;
|
||||||
|
while(result.Length < length)
|
||||||
|
{
|
||||||
|
wrandom.ClearContents();
|
||||||
|
// The substring that the next ngram in the chain needs to start with
|
||||||
|
string nextStartsWith = lastNgram.Substring(1);
|
||||||
|
// Get a list of possible n-grams we could choose from next
|
||||||
|
Dictionary<string, double> convNextNgrams = new Dictionary<string, double>();
|
||||||
|
ngrams.Where(gram_data => gram_data.Key.StartsWith(nextStartsWith))
|
||||||
|
.ForEach((KeyValuePair<string, double> ngramData) => convNextNgrams.Add(ngramData.Key, ngramData.Value));
|
||||||
|
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
|
||||||
|
if(convNextNgrams.Count() == 0)
|
||||||
|
break;
|
||||||
|
wrandom.SetContents(convNextNgrams);
|
||||||
|
// Pick a random n-gram from the list
|
||||||
|
string nextNgram = wrandom.Next();
|
||||||
|
// Add the last character from the n-gram to the string we're building
|
||||||
|
result += nextNgram[nextNgram.Length - 1];
|
||||||
|
lastNgram = nextNgram;
|
||||||
|
}
|
||||||
|
wrandom.ClearContents();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue