|
|
@ -3,122 +3,138 @@ using System.Collections.Generic; |
|
|
|
using System.Diagnostics; |
|
|
|
using System.IO; |
|
|
|
using System.Linq; |
|
|
|
using MarkovGrams.Utilities; |
|
|
|
|
|
|
|
namespace MarkovGrams |
|
|
|
{ |
|
|
|
public enum Mode |
|
|
|
{ |
|
|
|
Help, |
|
|
|
NGrams, |
|
|
|
Markov, |
|
|
|
WeightedMarkov |
|
|
|
} |
|
|
|
|
|
|
|
class MainClass |
|
|
|
{ |
|
|
|
public static int Main(string[] args) |
|
|
|
{ |
|
|
|
if(args.Length < 1) |
|
|
|
List<string> extras = new List<string>(); |
|
|
|
StreamReader wordlistSource = new StreamReader(Console.OpenStandardInput()); |
|
|
|
int order = 3, length = 8, count = 10; |
|
|
|
bool splitOnWhitespace = true, |
|
|
|
ngramsUnique = true, |
|
|
|
convertLowercase = false, |
|
|
|
startOnUppercase = false; |
|
|
|
for (int i = 0; i < args.Length; i++) |
|
|
|
{ |
|
|
|
Console.WriteLine("Usage:"); |
|
|
|
Console.WriteLine(" ./MarkovGrams.exe <command>"); |
|
|
|
Console.WriteLine(); |
|
|
|
Console.WriteLine("Available commands:"); |
|
|
|
Console.WriteLine(" markov:"); |
|
|
|
Console.WriteLine(" Generate new words using an unweighted markov chain."); |
|
|
|
Console.WriteLine(" markov-w:"); |
|
|
|
Console.WriteLine(" Generate new words using a weighted markov chain."); |
|
|
|
Console.WriteLine(" ngrams:"); |
|
|
|
Console.WriteLine(" Generate raw unique n-grams"); |
|
|
|
Console.WriteLine(); |
|
|
|
Console.WriteLine("Type just ./MarovGrams.exe <command> to see command-specific help."); |
|
|
|
return 1; |
|
|
|
if (!args[i].StartsWith("-")) |
|
|
|
{ |
|
|
|
extras.Add(args[i]); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
switch (args[i].TrimStart("-".ToCharArray())) |
|
|
|
{ |
|
|
|
case "wordlist": |
|
|
|
wordlistSource = new StreamReader(args[++i]); |
|
|
|
break; |
|
|
|
case "order": |
|
|
|
order = int.Parse(args[++i]); |
|
|
|
break; |
|
|
|
case "length": |
|
|
|
length = int.Parse(args[++i]); |
|
|
|
break; |
|
|
|
case "count": |
|
|
|
count = int.Parse(args[++i]); |
|
|
|
break; |
|
|
|
case "no-split": |
|
|
|
splitOnWhitespace = false; |
|
|
|
break; |
|
|
|
case "no-unique": |
|
|
|
ngramsUnique = false; |
|
|
|
break; |
|
|
|
case "lowercase": |
|
|
|
convertLowercase = true; |
|
|
|
break; |
|
|
|
case "start-uppercase": |
|
|
|
startOnUppercase = true; |
|
|
|
break; |
|
|
|
default: |
|
|
|
Console.Error.WriteLine($"Error: Unknown option '{args[i]}'."); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
string mode = args[0]; |
|
|
|
string wordlistFilename; |
|
|
|
int order; |
|
|
|
IEnumerable<string> words, ngrams; |
|
|
|
Mode mode = extras.Count > 0 ? (Mode)Enum.Parse(typeof(Mode), extras.ShiftAt(0), true) : Mode.Help; |
|
|
|
|
|
|
|
switch(mode) |
|
|
|
{ |
|
|
|
case "markov": |
|
|
|
if(args.Length != 5) |
|
|
|
{ |
|
|
|
Console.WriteLine("markov command usage:"); |
|
|
|
Console.WriteLine(" ./MarkovGrams.exe markov <wordlist.txt> <order> <length> <count>"); |
|
|
|
Console.WriteLine(); |
|
|
|
Console.WriteLine("<wordlist.txt> The path to the wordlist to read from."); |
|
|
|
Console.WriteLine("<order> The order of the n-grams to use."); |
|
|
|
Console.WriteLine("<length> The length of word to generate."); |
|
|
|
Console.WriteLine("<count> The number of words to generate."); |
|
|
|
Console.WriteLine(); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
wordlistFilename = args[1]; |
|
|
|
order = int.Parse(args[2]); |
|
|
|
int desiredStringLength = int.Parse(args[3]); |
|
|
|
int count = int.Parse(args[4]); |
|
|
|
|
|
|
|
words = File.ReadLines(wordlistFilename).SelectMany(word => word.Trim().Split(' ')); |
|
|
|
ngrams = NGrams.GenerateFlat(words, order); |
|
|
|
// ------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
IEnumerable<string> words = wordlistSource.ReadAllLines().SelectMany((string word) => { |
|
|
|
word = word.Trim(); |
|
|
|
if (convertLowercase) |
|
|
|
word = word.ToLower(); |
|
|
|
if (splitOnWhitespace) |
|
|
|
return word.Split(' '); |
|
|
|
return new string[] { word.Trim() }; |
|
|
|
}); |
|
|
|
|
|
|
|
switch (mode) |
|
|
|
{ |
|
|
|
case Mode.Markov: |
|
|
|
Stopwatch utimer = Stopwatch.StartNew(); |
|
|
|
UnweightedMarkovChain chain = new UnweightedMarkovChain(ngrams); |
|
|
|
UnweightedMarkovChain unweightedChain = new UnweightedMarkovChain( |
|
|
|
NGrams.GenerateFlat(words, order) |
|
|
|
); |
|
|
|
unweightedChain.StartOnUppercase = startOnUppercase; |
|
|
|
|
|
|
|
for(int i = 0; i < count; i++) |
|
|
|
Console.WriteLine(chain.Generate(desiredStringLength)); |
|
|
|
for (int i = 0; i < count; i++) |
|
|
|
Console.WriteLine(unweightedChain.Generate(length)); |
|
|
|
Console.Error.WriteLine($"{count} words in {utimer.ElapsedMilliseconds}ms"); |
|
|
|
break; |
|
|
|
|
|
|
|
case "markov-w": |
|
|
|
if (args.Length != 5) |
|
|
|
{ |
|
|
|
Console.WriteLine("markov-w command usage:"); |
|
|
|
Console.WriteLine(" ./MarkovGrams.exe markov-w <wordlist.txt> <order> <length> <count>"); |
|
|
|
Console.WriteLine(); |
|
|
|
Console.WriteLine("<wordlist.txt> The path to the wordlist to read from."); |
|
|
|
Console.WriteLine("<order> The order of the n-grams to use."); |
|
|
|
Console.WriteLine("<length> The length of word to generate."); |
|
|
|
Console.WriteLine("<count> The number of words to generate."); |
|
|
|
Console.WriteLine(); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
wordlistFilename = args[1]; |
|
|
|
order = int.Parse(args[2]); |
|
|
|
int weightedDesiredStringLength = int.Parse(args[3]); |
|
|
|
int weightedCount = int.Parse(args[4]); |
|
|
|
|
|
|
|
words = File.ReadLines(wordlistFilename).SelectMany(word => word.Trim().Split(' ')); |
|
|
|
ngrams = NGrams.GenerateFlat(words, order); |
|
|
|
|
|
|
|
case Mode.WeightedMarkov: |
|
|
|
Stopwatch wtimer = Stopwatch.StartNew(); |
|
|
|
WeightedMarkovChain weightedChain = new WeightedMarkovChain(ngrams); |
|
|
|
WeightedMarkovChain weightedChain = new WeightedMarkovChain( |
|
|
|
NGrams.GenerateWeighted(words, order) |
|
|
|
); |
|
|
|
weightedChain.StartOnUppercase = startOnUppercase; |
|
|
|
|
|
|
|
for (int i = 0; i < weightedCount; i++) |
|
|
|
Console.WriteLine(weightedChain.Generate(weightedDesiredStringLength)); |
|
|
|
Console.Error.WriteLine($"{weightedCount} words in {wtimer.ElapsedMilliseconds}ms"); |
|
|
|
for (int i = 0; i < count; i++) |
|
|
|
Console.WriteLine(weightedChain.Generate(length)); |
|
|
|
Console.Error.WriteLine($"{count} words in {wtimer.ElapsedMilliseconds}ms"); |
|
|
|
break; |
|
|
|
|
|
|
|
case "ngrams": |
|
|
|
if(args.Length != 3) |
|
|
|
{ |
|
|
|
Console.WriteLine("ngrams command usage:"); |
|
|
|
Console.WriteLine(" ./MarkovGrams.exe <wordlist.txt> <order>"); |
|
|
|
Console.WriteLine(); |
|
|
|
Console.WriteLine("<wordlist.txt> The path to the wordlist to read from."); |
|
|
|
Console.WriteLine("<order> The order of n-grams to generate."); |
|
|
|
Console.WriteLine(); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
|
|
|
|
wordlistFilename = args[1]; |
|
|
|
order = int.Parse(args[2]); |
|
|
|
words = File.ReadLines(wordlistFilename).SelectMany(word => word.Trim().Split(' ')); |
|
|
|
ngrams = NGrams.GenerateFlat(words, order); |
|
|
|
|
|
|
|
foreach(string ngram in ngrams) |
|
|
|
case Mode.NGrams: |
|
|
|
foreach (string ngram in NGrams.GenerateFlat(words, order, ngramsUnique)) |
|
|
|
Console.WriteLine(ngram); |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case Mode.Help: |
|
|
|
default: |
|
|
|
Console.WriteLine("Unknown command {0}."); |
|
|
|
Console.WriteLine("Available commands:"); |
|
|
|
Console.WriteLine(" markov Generate words with a markov chain"); |
|
|
|
Console.WriteLine(" ngrams Generate unique ngrams from wordlists"); |
|
|
|
Console.WriteLine("Usage:"); |
|
|
|
Console.WriteLine(" ./MarkovGrams.exe <mode> [options]"); |
|
|
|
Console.WriteLine(); |
|
|
|
Console.WriteLine("Available modes:"); |
|
|
|
Console.WriteLine(" markov:"); |
|
|
|
Console.WriteLine(" Generate new words using an unweighted markov chain."); |
|
|
|
Console.WriteLine(" markov-w:"); |
|
|
|
Console.WriteLine(" Generate new words using a weighted markov chain."); |
|
|
|
Console.WriteLine(" ngrams:"); |
|
|
|
Console.WriteLine(" Generate raw unique n-grams"); |
|
|
|
Console.WriteLine(); |
|
|
|
Console.WriteLine("Available options:"); |
|
|
|
Console.WriteLine(" --wordlist {filename} Read the wordlist from the specified filename instead of stdin"); |
|
|
|
Console.WriteLine(" --order {number} Use the specified order when generating n-grams (default: 3)"); |
|
|
|
Console.WriteLine(" --length {number} The target length of word to generate (Not available in ngrams mode)"); |
|
|
|
Console.WriteLine(" --count {number} The number of words to generate (Not valid in ngrams mode)"); |
|
|
|
Console.WriteLine(" --no-split Don't split input words on whitespace - treat each line as a single word"); |
|
|
|
Console.WriteLine(" --lowercase Convert the input to lowercase before processing"); |
|
|
|
Console.WriteLine(" --start-uppercase Start the generating a word only with n-grams that start with a capital letter"); |
|
|
|
Console.WriteLine(" --no-unique Don't remove duplicates from the list of ngrams (Only valid in ngrams mode)"); |
|
|
|
Console.WriteLine("Type just ./MarkovGrams.exe <mode> to see mode-specific help."); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
|
|
|
|