|
|
|
@ -3,122 +3,138 @@ using System.Collections.Generic;
|
|
|
|
|
using System.Diagnostics;
|
|
|
|
|
using System.IO;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
using MarkovGrams.Utilities;
|
|
|
|
|
|
|
|
|
|
namespace MarkovGrams
|
|
|
|
|
{
|
|
|
|
|
public enum Mode
|
|
|
|
|
{
|
|
|
|
|
Help,
|
|
|
|
|
NGrams,
|
|
|
|
|
Markov,
|
|
|
|
|
WeightedMarkov
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class MainClass
|
|
|
|
|
{
|
|
|
|
|
public static int Main(string[] args)
|
|
|
|
|
{
|
|
|
|
|
if(args.Length < 1)
|
|
|
|
|
List<string> extras = new List<string>();
|
|
|
|
|
StreamReader wordlistSource = new StreamReader(Console.OpenStandardInput());
|
|
|
|
|
int order = 3, length = 8, count = 10;
|
|
|
|
|
bool splitOnWhitespace = true,
|
|
|
|
|
ngramsUnique = true,
|
|
|
|
|
convertLowercase = false,
|
|
|
|
|
startOnUppercase = false;
|
|
|
|
|
for (int i = 0; i < args.Length; i++)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("Usage:");
|
|
|
|
|
Console.WriteLine(" ./MarkovGrams.exe <command>");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
Console.WriteLine("Available commands:");
|
|
|
|
|
Console.WriteLine(" markov:");
|
|
|
|
|
Console.WriteLine(" Generate new words using an unweighted markov chain.");
|
|
|
|
|
Console.WriteLine(" markov-w:");
|
|
|
|
|
Console.WriteLine(" Generate new words using a weighted markov chain.");
|
|
|
|
|
Console.WriteLine(" ngrams:");
|
|
|
|
|
Console.WriteLine(" Generate raw unique n-grams");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
Console.WriteLine("Type just ./MarovGrams.exe <command> to see command-specific help.");
|
|
|
|
|
return 1;
|
|
|
|
|
if (!args[i].StartsWith("-"))
|
|
|
|
|
{
|
|
|
|
|
extras.Add(args[i]);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (args[i].TrimStart("-".ToCharArray()))
|
|
|
|
|
{
|
|
|
|
|
case "wordlist":
|
|
|
|
|
wordlistSource = new StreamReader(args[++i]);
|
|
|
|
|
break;
|
|
|
|
|
case "order":
|
|
|
|
|
order = int.Parse(args[++i]);
|
|
|
|
|
break;
|
|
|
|
|
case "length":
|
|
|
|
|
length = int.Parse(args[++i]);
|
|
|
|
|
break;
|
|
|
|
|
case "count":
|
|
|
|
|
count = int.Parse(args[++i]);
|
|
|
|
|
break;
|
|
|
|
|
case "no-split":
|
|
|
|
|
splitOnWhitespace = false;
|
|
|
|
|
break;
|
|
|
|
|
case "no-unique":
|
|
|
|
|
ngramsUnique = false;
|
|
|
|
|
break;
|
|
|
|
|
case "lowercase":
|
|
|
|
|
convertLowercase = true;
|
|
|
|
|
break;
|
|
|
|
|
case "start-uppercase":
|
|
|
|
|
startOnUppercase = true;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
Console.Error.WriteLine($"Error: Unknown option '{args[i]}'.");
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
string mode = args[0];
|
|
|
|
|
string wordlistFilename;
|
|
|
|
|
int order;
|
|
|
|
|
IEnumerable<string> words, ngrams;
|
|
|
|
|
Mode mode = extras.Count > 0 ? (Mode)Enum.Parse(typeof(Mode), extras.ShiftAt(0), true) : Mode.Help;
|
|
|
|
|
|
|
|
|
|
switch(mode)
|
|
|
|
|
{
|
|
|
|
|
case "markov":
|
|
|
|
|
if(args.Length != 5)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("markov command usage:");
|
|
|
|
|
Console.WriteLine(" ./MarkovGrams.exe markov <wordlist.txt> <order> <length> <count>");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
Console.WriteLine("<wordlist.txt> The path to the wordlist to read from.");
|
|
|
|
|
Console.WriteLine("<order> The order of the n-grams to use.");
|
|
|
|
|
Console.WriteLine("<length> The length of word to generate.");
|
|
|
|
|
Console.WriteLine("<count> The number of words to generate.");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
wordlistFilename = args[1];
|
|
|
|
|
order = int.Parse(args[2]);
|
|
|
|
|
int desiredStringLength = int.Parse(args[3]);
|
|
|
|
|
int count = int.Parse(args[4]);
|
|
|
|
|
|
|
|
|
|
words = File.ReadLines(wordlistFilename).SelectMany(word => word.Trim().Split(' '));
|
|
|
|
|
ngrams = NGrams.GenerateFlat(words, order);
|
|
|
|
|
// ------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
IEnumerable<string> words = wordlistSource.ReadAllLines().SelectMany((string word) => {
|
|
|
|
|
word = word.Trim();
|
|
|
|
|
if (convertLowercase)
|
|
|
|
|
word = word.ToLower();
|
|
|
|
|
if (splitOnWhitespace)
|
|
|
|
|
return word.Split(' ');
|
|
|
|
|
return new string[] { word.Trim() };
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
switch (mode)
|
|
|
|
|
{
|
|
|
|
|
case Mode.Markov:
|
|
|
|
|
Stopwatch utimer = Stopwatch.StartNew();
|
|
|
|
|
UnweightedMarkovChain chain = new UnweightedMarkovChain(ngrams);
|
|
|
|
|
UnweightedMarkovChain unweightedChain = new UnweightedMarkovChain(
|
|
|
|
|
NGrams.GenerateFlat(words, order)
|
|
|
|
|
);
|
|
|
|
|
unweightedChain.StartOnUppercase = startOnUppercase;
|
|
|
|
|
|
|
|
|
|
for(int i = 0; i < count; i++)
|
|
|
|
|
Console.WriteLine(chain.Generate(desiredStringLength));
|
|
|
|
|
for (int i = 0; i < count; i++)
|
|
|
|
|
Console.WriteLine(unweightedChain.Generate(length));
|
|
|
|
|
Console.Error.WriteLine($"{count} words in {utimer.ElapsedMilliseconds}ms");
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case "markov-w":
|
|
|
|
|
if (args.Length != 5)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("markov-w command usage:");
|
|
|
|
|
Console.WriteLine(" ./MarkovGrams.exe markov-w <wordlist.txt> <order> <length> <count>");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
Console.WriteLine("<wordlist.txt> The path to the wordlist to read from.");
|
|
|
|
|
Console.WriteLine("<order> The order of the n-grams to use.");
|
|
|
|
|
Console.WriteLine("<length> The length of word to generate.");
|
|
|
|
|
Console.WriteLine("<count> The number of words to generate.");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
wordlistFilename = args[1];
|
|
|
|
|
order = int.Parse(args[2]);
|
|
|
|
|
int weightedDesiredStringLength = int.Parse(args[3]);
|
|
|
|
|
int weightedCount = int.Parse(args[4]);
|
|
|
|
|
|
|
|
|
|
words = File.ReadLines(wordlistFilename).SelectMany(word => word.Trim().Split(' '));
|
|
|
|
|
ngrams = NGrams.GenerateFlat(words, order);
|
|
|
|
|
|
|
|
|
|
case Mode.WeightedMarkov:
|
|
|
|
|
Stopwatch wtimer = Stopwatch.StartNew();
|
|
|
|
|
WeightedMarkovChain weightedChain = new WeightedMarkovChain(ngrams);
|
|
|
|
|
WeightedMarkovChain weightedChain = new WeightedMarkovChain(
|
|
|
|
|
NGrams.GenerateWeighted(words, order)
|
|
|
|
|
);
|
|
|
|
|
weightedChain.StartOnUppercase = startOnUppercase;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < weightedCount; i++)
|
|
|
|
|
Console.WriteLine(weightedChain.Generate(weightedDesiredStringLength));
|
|
|
|
|
Console.Error.WriteLine($"{weightedCount} words in {wtimer.ElapsedMilliseconds}ms");
|
|
|
|
|
for (int i = 0; i < count; i++)
|
|
|
|
|
Console.WriteLine(weightedChain.Generate(length));
|
|
|
|
|
Console.Error.WriteLine($"{count} words in {wtimer.ElapsedMilliseconds}ms");
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case "ngrams":
|
|
|
|
|
if(args.Length != 3)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("ngrams command usage:");
|
|
|
|
|
Console.WriteLine(" ./MarkovGrams.exe <wordlist.txt> <order>");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
Console.WriteLine("<wordlist.txt> The path to the wordlist to read from.");
|
|
|
|
|
Console.WriteLine("<order> The order of n-grams to generate.");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
wordlistFilename = args[1];
|
|
|
|
|
order = int.Parse(args[2]);
|
|
|
|
|
words = File.ReadLines(wordlistFilename).SelectMany(word => word.Trim().Split(' '));
|
|
|
|
|
ngrams = NGrams.GenerateFlat(words, order);
|
|
|
|
|
|
|
|
|
|
foreach(string ngram in ngrams)
|
|
|
|
|
case Mode.NGrams:
|
|
|
|
|
foreach (string ngram in NGrams.GenerateFlat(words, order, ngramsUnique))
|
|
|
|
|
Console.WriteLine(ngram);
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case Mode.Help:
|
|
|
|
|
default:
|
|
|
|
|
Console.WriteLine("Unknown command {0}.");
|
|
|
|
|
Console.WriteLine("Available commands:");
|
|
|
|
|
Console.WriteLine(" markov Generate words with a markov chain");
|
|
|
|
|
Console.WriteLine(" ngrams Generate unique ngrams from wordlists");
|
|
|
|
|
Console.WriteLine("Usage:");
|
|
|
|
|
Console.WriteLine(" ./MarkovGrams.exe <mode> [options]");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
Console.WriteLine("Available modes:");
|
|
|
|
|
Console.WriteLine(" markov:");
|
|
|
|
|
Console.WriteLine(" Generate new words using an unweighted markov chain.");
|
|
|
|
|
Console.WriteLine(" markov-w:");
|
|
|
|
|
Console.WriteLine(" Generate new words using a weighted markov chain.");
|
|
|
|
|
Console.WriteLine(" ngrams:");
|
|
|
|
|
Console.WriteLine(" Generate raw unique n-grams");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
Console.WriteLine("Available options:");
|
|
|
|
|
Console.WriteLine(" --wordlist {filename} Read the wordlist from the specified filename instead of stdin");
|
|
|
|
|
Console.WriteLine(" --order {number} Use the specified order when generating n-grams (default: 3)");
|
|
|
|
|
Console.WriteLine(" --length {number} The target length of word to generate (Not available in ngrams mode)");
|
|
|
|
|
Console.WriteLine(" --count {number} The number of words to generate (Not valid in ngrams mode)");
|
|
|
|
|
Console.WriteLine(" --no-split Don't split input words on whitespace - treat each line as a single word");
|
|
|
|
|
Console.WriteLine(" --lowercase Convert the input to lowercase before processing");
|
|
|
|
|
Console.WriteLine(" --start-uppercase Start the generating a word only with n-grams that start with a capital letter");
|
|
|
|
|
Console.WriteLine(" --no-unique Don't remove duplicates from the list of ngrams (Only valid in ngrams mode)");
|
|
|
|
|
Console.WriteLine("Type just ./MarkovGrams.exe <mode> to see mode-specific help.");
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|