2017-04-26 20:35:00 +00:00
using System ;
using System.Collections.Generic ;
2018-02-17 21:26:27 +00:00
using System.Diagnostics ;
2017-04-26 20:35:00 +00:00
using System.IO ;
2017-04-26 20:43:45 +00:00
using System.Linq ;
2018-09-02 20:52:22 +00:00
using MarkovGrams.Utilities ;
2017-04-26 20:35:00 +00:00
namespace MarkovGrams
{
2018-09-02 20:52:22 +00:00
public enum Mode
{
2018-09-02 21:28:09 +00:00
None ,
2018-09-02 20:52:22 +00:00
Help ,
NGrams ,
Markov ,
WeightedMarkov
}
2017-04-26 20:35:00 +00:00
class MainClass
{
2017-05-18 11:58:20 +00:00
public static int Main ( string [ ] args )
2017-04-26 20:35:00 +00:00
{
2018-09-03 13:06:50 +00:00
Mode operationMode = Mode . None ;
GenerationMode generationMode = GenerationMode . CharacterLevel ;
2018-09-02 20:52:22 +00:00
List < string > extras = new List < string > ( ) ;
StreamReader wordlistSource = new StreamReader ( Console . OpenStandardInput ( ) ) ;
int order = 3 , length = 8 , count = 10 ;
bool splitOnWhitespace = true ,
ngramsUnique = true ,
convertLowercase = false ,
startOnUppercase = false ;
for ( int i = 0 ; i < args . Length ; i + + )
2017-04-26 20:35:00 +00:00
{
2018-09-02 20:52:22 +00:00
if ( ! args [ i ] . StartsWith ( "-" ) )
{
extras . Add ( args [ i ] ) ;
continue ;
}
switch ( args [ i ] . TrimStart ( "-" . ToCharArray ( ) ) )
{
case "wordlist" :
wordlistSource = new StreamReader ( args [ + + i ] ) ;
break ;
case "order" :
order = int . Parse ( args [ + + i ] ) ;
break ;
case "length" :
length = int . Parse ( args [ + + i ] ) ;
break ;
case "count" :
count = int . Parse ( args [ + + i ] ) ;
break ;
case "no-split" :
splitOnWhitespace = false ;
break ;
case "no-unique" :
ngramsUnique = false ;
break ;
case "lowercase" :
convertLowercase = true ;
break ;
case "start-uppercase" :
startOnUppercase = true ;
break ;
2018-09-03 13:06:50 +00:00
case "words" :
generationMode = GenerationMode . WordLevel ;
2018-09-03 14:48:27 +00:00
splitOnWhitespace = false ;
2018-09-03 13:06:50 +00:00
break ;
2018-09-02 21:28:09 +00:00
case "help" :
2018-09-03 13:06:50 +00:00
operationMode = Mode . Help ;
2018-09-02 21:28:09 +00:00
break ;
2018-09-02 20:52:22 +00:00
default :
Console . Error . WriteLine ( $"Error: Unknown option '{args[i]}'." ) ;
return 1 ;
}
2017-04-26 20:35:00 +00:00
}
2018-09-03 13:06:50 +00:00
if ( operationMode ! = Mode . Help & & extras . Count > 0 )
operationMode = ( Mode ) Enum . Parse ( typeof ( Mode ) , extras . ShiftAt ( 0 ) . Replace ( "markov-w" , "weightedmarkov" ) , true ) ;
2017-04-26 20:35:00 +00:00
2017-05-18 11:58:20 +00:00
2018-09-02 20:52:22 +00:00
// ------------------------------------------------------------------------------------------
IEnumerable < string > words = wordlistSource . ReadAllLines ( ) . SelectMany ( ( string word ) = > {
word = word . Trim ( ) ;
2018-09-03 20:20:04 +00:00
// No blank lines here!
if ( string . IsNullOrWhiteSpace ( word ) ) return new string [ ] { } ;
2018-09-02 20:52:22 +00:00
if ( convertLowercase )
word = word . ToLower ( ) ;
if ( splitOnWhitespace )
return word . Split ( ' ' ) ;
return new string [ ] { word . Trim ( ) } ;
} ) ;
2018-09-03 15:21:38 +00:00
List < float > choicePointRatios = new List < float > ( ) ;
2017-05-18 11:58:20 +00:00
2018-09-03 13:06:50 +00:00
switch ( operationMode )
2018-09-02 20:52:22 +00:00
{
case Mode . Markov :
2018-02-17 21:26:27 +00:00
Stopwatch utimer = Stopwatch . StartNew ( ) ;
2018-09-02 20:52:22 +00:00
UnweightedMarkovChain unweightedChain = new UnweightedMarkovChain (
2018-09-03 13:06:50 +00:00
NGrams . GenerateFlat ( words , order , generationMode ) ,
generationMode
2018-09-02 20:52:22 +00:00
) ;
unweightedChain . StartOnUppercase = startOnUppercase ;
2017-05-18 11:58:20 +00:00
2018-09-03 15:21:38 +00:00
for ( int i = 0 ; i < count ; i + + ) {
Console . WriteLine ( unweightedChain . Generate ( length , out float nextChoicePathRatio ) ) ;
choicePointRatios . Add ( nextChoicePathRatio ) ;
}
Console . Error . WriteLine ( $"{count} words in {utimer.ElapsedMilliseconds}ms (average choice-point-ratio: {Math.Round(choicePointRatios.Sum()/count, 2)})" ) ;
2017-05-18 11:58:20 +00:00
break ;
2018-02-17 21:19:50 +00:00
2018-09-02 20:52:22 +00:00
case Mode . WeightedMarkov :
2018-02-17 21:26:27 +00:00
Stopwatch wtimer = Stopwatch . StartNew ( ) ;
2018-09-02 20:52:22 +00:00
WeightedMarkovChain weightedChain = new WeightedMarkovChain (
2018-09-03 13:06:50 +00:00
NGrams . GenerateWeighted ( words , order , generationMode ) ,
generationMode
2018-09-02 20:52:22 +00:00
) ;
weightedChain . StartOnUppercase = startOnUppercase ;
2018-02-17 21:19:50 +00:00
2018-09-03 15:21:38 +00:00
for ( int i = 0 ; i < count ; i + + ) {
Console . WriteLine ( weightedChain . Generate ( length , out float nextChoicePointRatio ) ) ;
choicePointRatios . Add ( nextChoicePointRatio ) ;
}
Console . Error . WriteLine ( $"{count} words in {wtimer.ElapsedMilliseconds}ms (average choice-point-ratio: {Math.Round(choicePointRatios.Sum() / count, 2)})" ) ;
2018-02-17 21:19:50 +00:00
break ;
2017-05-18 11:58:20 +00:00
2018-09-02 20:52:22 +00:00
case Mode . NGrams :
2018-09-03 13:06:50 +00:00
foreach ( string ngram in NGrams . GenerateFlat ( words , order , generationMode , ngramsUnique ) )
2017-05-18 11:58:20 +00:00
Console . WriteLine ( ngram ) ;
break ;
2018-09-02 20:52:22 +00:00
case Mode . Help :
2017-05-18 11:58:20 +00:00
default :
2018-09-02 20:52:22 +00:00
Console . WriteLine ( "Usage:" ) ;
Console . WriteLine ( " ./MarkovGrams.exe <mode> [options]" ) ;
Console . WriteLine ( ) ;
Console . WriteLine ( "Available modes:" ) ;
Console . WriteLine ( " markov:" ) ;
Console . WriteLine ( " Generate new words using an unweighted markov chain." ) ;
Console . WriteLine ( " markov-w:" ) ;
Console . WriteLine ( " Generate new words using a weighted markov chain." ) ;
Console . WriteLine ( " ngrams:" ) ;
Console . WriteLine ( " Generate raw unique n-grams" ) ;
2017-05-18 11:58:20 +00:00
Console . WriteLine ( ) ;
2018-09-02 20:52:22 +00:00
Console . WriteLine ( "Available options:" ) ;
2018-09-02 21:28:09 +00:00
Console . WriteLine ( " --help Show this message" ) ;
2018-09-02 20:52:22 +00:00
Console . WriteLine ( " --wordlist {filename} Read the wordlist from the specified filename instead of stdin" ) ;
Console . WriteLine ( " --order {number} Use the specified order when generating n-grams (default: 3)" ) ;
2018-09-03 14:48:27 +00:00
Console . WriteLine ( " --length {number} The target length of word to generate (Not available in ngrams mode; instead specifies the number of words to generate with --words)" ) ;
2018-09-02 20:52:22 +00:00
Console . WriteLine ( " --count {number} The number of words to generate (Not valid in ngrams mode)" ) ;
2018-09-03 14:48:27 +00:00
Console . WriteLine ( " --words Generate ngrams on word-level instead of character-level (Applies to all modes; implies --no-split)" ) ;
2018-09-02 20:52:22 +00:00
Console . WriteLine ( " --no-split Don't split input words on whitespace - treat each line as a single word" ) ;
Console . WriteLine ( " --lowercase Convert the input to lowercase before processing" ) ;
Console . WriteLine ( " --start-uppercase Start the generating a word only with n-grams that start with a capital letter" ) ;
Console . WriteLine ( " --no-unique Don't remove duplicates from the list of ngrams (Only valid in ngrams mode)" ) ;
Console . WriteLine ( "Type just ./MarkovGrams.exe <mode> to see mode-specific help." ) ;
2017-05-18 11:58:20 +00:00
return 1 ;
}
2017-04-26 20:35:00 +00:00
2017-05-18 11:58:20 +00:00
return 0 ;
2017-04-26 20:35:00 +00:00
}
}
}