MarkovGrams/MarkovGrams/Program.cs

168 lines
5.8 KiB
C#
Raw Normal View History

using System;
using System.Collections.Generic;
2018-02-17 21:26:27 +00:00
using System.Diagnostics;
using System.IO;
using System.Linq;
using MarkovGrams.Utilities;
namespace MarkovGrams
{
public enum Mode
{
2018-09-02 21:28:09 +00:00
None,
Help,
NGrams,
Markov,
WeightedMarkov
}
class MainClass
{
2017-05-18 11:58:20 +00:00
public static int Main(string[] args)
{
2018-09-03 13:06:50 +00:00
Mode operationMode = Mode.None;
GenerationMode generationMode = GenerationMode.CharacterLevel;
List<string> extras = new List<string>();
StreamReader wordlistSource = new StreamReader(Console.OpenStandardInput());
int order = 3, length = 8, count = 10;
bool splitOnWhitespace = true,
ngramsUnique = true,
convertLowercase = false,
startOnUppercase = false;
for (int i = 0; i < args.Length; i++)
{
if (!args[i].StartsWith("-"))
{
extras.Add(args[i]);
continue;
}
switch (args[i].TrimStart("-".ToCharArray()))
{
case "wordlist":
wordlistSource = new StreamReader(args[++i]);
break;
case "order":
order = int.Parse(args[++i]);
break;
case "length":
length = int.Parse(args[++i]);
break;
case "count":
count = int.Parse(args[++i]);
break;
case "no-split":
splitOnWhitespace = false;
break;
case "no-unique":
ngramsUnique = false;
break;
case "lowercase":
convertLowercase = true;
break;
case "start-uppercase":
startOnUppercase = true;
break;
2018-09-03 13:06:50 +00:00
case "words":
generationMode = GenerationMode.WordLevel;
2018-09-03 14:48:27 +00:00
splitOnWhitespace = false;
2018-09-03 13:06:50 +00:00
break;
2018-09-02 21:28:09 +00:00
case "help":
2018-09-03 13:06:50 +00:00
operationMode = Mode.Help;
2018-09-02 21:28:09 +00:00
break;
default:
Console.Error.WriteLine($"Error: Unknown option '{args[i]}'.");
return 1;
}
}
2018-09-03 13:06:50 +00:00
if(operationMode != Mode.Help && extras.Count > 0)
operationMode = (Mode)Enum.Parse(typeof(Mode), extras.ShiftAt(0).Replace("markov-w", "weightedmarkov"), true);
2017-05-18 11:58:20 +00:00
// ------------------------------------------------------------------------------------------
IEnumerable<string> words = wordlistSource.ReadAllLines().SelectMany((string word) => {
word = word.Trim();
2018-09-03 20:20:04 +00:00
// No blank lines here!
if (string.IsNullOrWhiteSpace(word)) return new string[] { };
if (convertLowercase)
word = word.ToLower();
if (splitOnWhitespace)
return word.Split(' ');
return new string[] { word.Trim() };
});
List<float> choicePointRatios = new List<float>();
2017-05-18 11:58:20 +00:00
2018-09-03 13:06:50 +00:00
switch (operationMode)
{
case Mode.Markov:
2018-02-17 21:26:27 +00:00
Stopwatch utimer = Stopwatch.StartNew();
UnweightedMarkovChain unweightedChain = new UnweightedMarkovChain(
2018-09-03 13:06:50 +00:00
NGrams.GenerateFlat(words, order, generationMode),
generationMode
);
unweightedChain.StartOnUppercase = startOnUppercase;
2017-05-18 11:58:20 +00:00
for (int i = 0; i < count; i++) {
Console.WriteLine(unweightedChain.Generate(length, out float nextChoicePathRatio));
choicePointRatios.Add(nextChoicePathRatio);
}
Console.Error.WriteLine($"{count} words in {utimer.ElapsedMilliseconds}ms (average choice-point-ratio: {Math.Round(choicePointRatios.Sum()/count, 2)})");
2017-05-18 11:58:20 +00:00
break;
2018-02-17 21:19:50 +00:00
case Mode.WeightedMarkov:
2018-02-17 21:26:27 +00:00
Stopwatch wtimer = Stopwatch.StartNew();
WeightedMarkovChain weightedChain = new WeightedMarkovChain(
2018-09-03 13:06:50 +00:00
NGrams.GenerateWeighted(words, order, generationMode),
generationMode
);
weightedChain.StartOnUppercase = startOnUppercase;
2018-02-17 21:19:50 +00:00
for (int i = 0; i < count; i++) {
Console.WriteLine(weightedChain.Generate(length, out float nextChoicePointRatio));
choicePointRatios.Add(nextChoicePointRatio);
}
Console.Error.WriteLine($"{count} words in {wtimer.ElapsedMilliseconds}ms (average choice-point-ratio: {Math.Round(choicePointRatios.Sum() / count, 2)})");
2018-02-17 21:19:50 +00:00
break;
2017-05-18 11:58:20 +00:00
case Mode.NGrams:
2018-09-03 13:06:50 +00:00
foreach (string ngram in NGrams.GenerateFlat(words, order, generationMode, ngramsUnique))
2017-05-18 11:58:20 +00:00
Console.WriteLine(ngram);
break;
case Mode.Help:
2017-05-18 11:58:20 +00:00
default:
Console.WriteLine("Usage:");
Console.WriteLine(" ./MarkovGrams.exe <mode> [options]");
Console.WriteLine();
Console.WriteLine("Available modes:");
Console.WriteLine(" markov:");
Console.WriteLine(" Generate new words using an unweighted markov chain.");
Console.WriteLine(" markov-w:");
Console.WriteLine(" Generate new words using a weighted markov chain.");
Console.WriteLine(" ngrams:");
Console.WriteLine(" Generate raw unique n-grams");
2017-05-18 11:58:20 +00:00
Console.WriteLine();
Console.WriteLine("Available options:");
2018-09-02 21:28:09 +00:00
Console.WriteLine(" --help Show this message");
Console.WriteLine(" --wordlist {filename} Read the wordlist from the specified filename instead of stdin");
Console.WriteLine(" --order {number} Use the specified order when generating n-grams (default: 3)");
2018-09-03 14:48:27 +00:00
Console.WriteLine(" --length {number} The target length of word to generate (Not available in ngrams mode; instead specifies the number of words to generate with --words)");
Console.WriteLine(" --count {number} The number of words to generate (Not valid in ngrams mode)");
2018-09-03 14:48:27 +00:00
Console.WriteLine(" --words Generate ngrams on word-level instead of character-level (Applies to all modes; implies --no-split)");
Console.WriteLine(" --no-split Don't split input words on whitespace - treat each line as a single word");
Console.WriteLine(" --lowercase Convert the input to lowercase before processing");
Console.WriteLine(" --start-uppercase Start the generating a word only with n-grams that start with a capital letter");
Console.WriteLine(" --no-unique Don't remove duplicates from the list of ngrams (Only valid in ngrams mode)");
Console.WriteLine("Type just ./MarkovGrams.exe <mode> to see mode-specific help.");
2017-05-18 11:58:20 +00:00
return 1;
}
2017-05-18 11:58:20 +00:00
return 0;
}
}
}