Experiments into markov chains, n-grams, and text generation.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

158 lines
5.2 KiB

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using MarkovGrams.Utilities;
namespace MarkovGrams
{
public enum Mode
{
None,
Help,
NGrams,
Markov,
WeightedMarkov
}
class MainClass
{
public static int Main(string[] args)
{
Mode operationMode = Mode.None;
GenerationMode generationMode = GenerationMode.CharacterLevel;
List<string> extras = new List<string>();
StreamReader wordlistSource = new StreamReader(Console.OpenStandardInput());
int order = 3, length = 8, count = 10;
bool splitOnWhitespace = true,
ngramsUnique = true,
convertLowercase = false,
startOnUppercase = false;
for (int i = 0; i < args.Length; i++)
{
if (!args[i].StartsWith("-"))
{
extras.Add(args[i]);
continue;
}
switch (args[i].TrimStart("-".ToCharArray()))
{
case "wordlist":
wordlistSource = new StreamReader(args[++i]);
break;
case "order":
order = int.Parse(args[++i]);
break;
case "length":
length = int.Parse(args[++i]);
break;
case "count":
count = int.Parse(args[++i]);
break;
case "no-split":
splitOnWhitespace = false;
break;
case "no-unique":
ngramsUnique = false;
break;
case "lowercase":
convertLowercase = true;
break;
case "start-uppercase":
startOnUppercase = true;
break;
case "words":
generationMode = GenerationMode.WordLevel;
break;
case "help":
operationMode = Mode.Help;
break;
default:
Console.Error.WriteLine($"Error: Unknown option '{args[i]}'.");
return 1;
}
}
if(operationMode != Mode.Help && extras.Count > 0)
operationMode = (Mode)Enum.Parse(typeof(Mode), extras.ShiftAt(0).Replace("markov-w", "weightedmarkov"), true);
// ------------------------------------------------------------------------------------------
IEnumerable<string> words = wordlistSource.ReadAllLines().SelectMany((string word) => {
word = word.Trim();
if (convertLowercase)
word = word.ToLower();
if (splitOnWhitespace)
return word.Split(' ');
return new string[] { word.Trim() };
});
switch (operationMode)
{
case Mode.Markov:
Stopwatch utimer = Stopwatch.StartNew();
UnweightedMarkovChain unweightedChain = new UnweightedMarkovChain(
NGrams.GenerateFlat(words, order, generationMode),
generationMode
);
unweightedChain.StartOnUppercase = startOnUppercase;
for (int i = 0; i < count; i++)
Console.WriteLine(unweightedChain.Generate(length));
Console.Error.WriteLine($"{count} words in {utimer.ElapsedMilliseconds}ms");
break;
case Mode.WeightedMarkov:
Stopwatch wtimer = Stopwatch.StartNew();
WeightedMarkovChain weightedChain = new WeightedMarkovChain(
NGrams.GenerateWeighted(words, order, generationMode),
generationMode
);
weightedChain.StartOnUppercase = startOnUppercase;
for (int i = 0; i < count; i++)
Console.WriteLine(weightedChain.Generate(length));
Console.Error.WriteLine($"{count} words in {wtimer.ElapsedMilliseconds}ms");
break;
case Mode.NGrams:
foreach (string ngram in NGrams.GenerateFlat(words, order, generationMode, ngramsUnique))
Console.WriteLine(ngram);
break;
case Mode.Help:
default:
Console.WriteLine("Usage:");
Console.WriteLine(" ./MarkovGrams.exe <mode> [options]");
Console.WriteLine();
Console.WriteLine("Available modes:");
Console.WriteLine(" markov:");
Console.WriteLine(" Generate new words using an unweighted markov chain.");
Console.WriteLine(" markov-w:");
Console.WriteLine(" Generate new words using a weighted markov chain.");
Console.WriteLine(" ngrams:");
Console.WriteLine(" Generate raw unique n-grams");
Console.WriteLine();
Console.WriteLine("Available options:");
Console.WriteLine(" --help Show this message");
Console.WriteLine(" --wordlist {filename} Read the wordlist from the specified filename instead of stdin");
Console.WriteLine(" --order {number} Use the specified order when generating n-grams (default: 3)");
Console.WriteLine(" --length {number} The target length of word to generate (Not available in ngrams mode)");
Console.WriteLine(" --count {number} The number of words to generate (Not valid in ngrams mode)");
Console.WriteLine(" --words Generate ngrams on word-level instead of character-level (Applies to all modes)");
Console.WriteLine(" --no-split Don't split input words on whitespace - treat each line as a single word");
Console.WriteLine(" --lowercase Convert the input to lowercase before processing");
Console.WriteLine(" --start-uppercase Start the generating a word only with n-grams that start with a capital letter");
Console.WriteLine(" --no-unique Don't remove duplicates from the list of ngrams (Only valid in ngrams mode)");
Console.WriteLine("Type just ./MarkovGrams.exe <mode> to see mode-specific help.");
return 1;
}
return 0;
}
}
}