Initial commit. I think this si worth keeping in it's entirety!

This commit is contained in:
Starbeamrainbowlabs 2017-04-26 21:35:00 +01:00
commit 647b4cc0b1
7 changed files with 705 additions and 0 deletions

.gitignore vendored Normal file
View file

@ -0,0 +1,434 @@
# Created by,monodevelop,csharp
### Csharp ###
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
## Get latest from
# User-specific files
# User-specific files (MonoDevelop/Xamarin Studio)
# Build results
# Visual Studio 2015 cache/options directory
# Uncomment if you have tasks that create the project's static files in wwwroot
# MSTest test Results
# Build Results of an ATL Project
# .NET Core
# Chutzpah Test files
# Visual C++ cache files
# Visual Studio profiler
# TFS 2012 Local Workspace
# Guidance Automation Toolkit
# ReSharper is a .NET coding add-in
# JustCode is a .NET coding add-in
# TeamCity is a build add-in
# DotCover is a Code Coverage Tool
# Visual Studio code coverage results
# NCrunch
# MightyMoose
# Web workbench (sass)
# Installshield output folder
# DocProject is a documentation generator add-in
# Click-Once directory
# Publish Web Output
# TODO: Comment the next line if you want to checkin your web deploy settings
# but database connection strings (with potential passwords) will be unencrypted
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
# NuGet Packages
# The packages folder can be ignored because of Package Restore
# except build/, which is used as an MSBuild target.
# Uncomment if necessary however generally it will be regenerated when needed
# NuGet v3's project.json files produces more ignorable files
# Microsoft Azure Build Output
# Microsoft Azure Emulator
# Windows Store app package directories and files
# Visual Studio cache files
# files ending in .cache can be ignored
# but keep track of directories ending in .cache
# Others
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (
# RIA/Silverlight projects
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
# SQL Server files
# Business Intelligence projects
# Microsoft Fakes
# GhostDoc plugin setting file
# Node.js Tools for Visual Studio
# Typescript v1 declaration files
# Visual Studio 6 build log
# Visual Studio 6 workspace options file
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
# Visual Studio LightSwitch build output
# Paket dependency manager
# FAKE - F# Make
# JetBrains Rider
# CodeRush
# Python Tools for Visual Studio (PTVS)
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Telerik's JustMock configuration file
# BizTalk build output
### MonoDevelop ###
#User Specific
#Mono Project Files
### VisualStudio ###
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
## Get latest from
# User-specific files
# User-specific files (MonoDevelop/Xamarin Studio)
# Build results
# Visual Studio 2015 cache/options directory
# Uncomment if you have tasks that create the project's static files in wwwroot
# MSTest test Results
# Build Results of an ATL Project
# .NET Core
# Chutzpah Test files
# Visual C++ cache files
# Visual Studio profiler
# TFS 2012 Local Workspace
# Guidance Automation Toolkit
# ReSharper is a .NET coding add-in
# JustCode is a .NET coding add-in
# TeamCity is a build add-in
# DotCover is a Code Coverage Tool
# Visual Studio code coverage results
# NCrunch
# MightyMoose
# Web workbench (sass)
# Installshield output folder
# DocProject is a documentation generator add-in
# Click-Once directory
# Publish Web Output
# TODO: Comment the next line if you want to checkin your web deploy settings
# but database connection strings (with potential passwords) will be unencrypted
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
# NuGet Packages
# The packages folder can be ignored because of Package Restore
# except build/, which is used as an MSBuild target.
# Uncomment if necessary however generally it will be regenerated when needed
# NuGet v3's project.json files produces more ignorable files
# Microsoft Azure Build Output
# Microsoft Azure Emulator
# Windows Store app package directories and files
# Visual Studio cache files
# files ending in .cache can be ignored
# but keep track of directories ending in .cache
# Others
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (
# RIA/Silverlight projects
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
# SQL Server files
# Business Intelligence projects
# Microsoft Fakes
# GhostDoc plugin setting file
# Node.js Tools for Visual Studio
# Typescript v1 declaration files
# Visual Studio 6 build log
# Visual Studio 6 workspace options file
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
# Visual Studio LightSwitch build output
# Paket dependency manager
# FAKE - F# Make
# JetBrains Rider
# CodeRush
# Python Tools for Visual Studio (PTVS)
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Telerik's JustMock configuration file
# BizTalk build output
# End of,monodevelop,csharp

MarkovGrams.sln Normal file
View file

@ -0,0 +1,17 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkovGrams", "MarkovGrams\MarkovGrams.csproj", "{14743F58-9418-4147-9C2C-0626AD7185D3}"
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x86 = Debug|x86
Release|x86 = Release|x86
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{14743F58-9418-4147-9C2C-0626AD7185D3}.Debug|x86.ActiveCfg = Debug|x86
{14743F58-9418-4147-9C2C-0626AD7185D3}.Debug|x86.Build.0 = Debug|x86
{14743F58-9418-4147-9C2C-0626AD7185D3}.Release|x86.ActiveCfg = Release|x86
{14743F58-9418-4147-9C2C-0626AD7185D3}.Release|x86.Build.0 = Release|x86

View file

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="">
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<Reference Include="System" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="NGrams.cs" />
<Compile Include="UnweightedMarkovChain.cs" />
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />

MarkovGrams/NGrams.cs Normal file
View file

@ -0,0 +1,85 @@
using System;
using System.Collections.Generic;
using System.Linq;
namespace MarkovGrams
/// <summary>
/// A collection of methods to generate various different types of n-grams.
/// </summary>
public static class NGrams
/// <summary>
/// Generates a unique list of n-grams that the given list of words.
/// </summary>
/// <param name="words">The words to turn into n-grams.</param>
/// <param name="order">The order of n-gram to generate..</param>
/// <returns>A unique list of n-grams found in the given list of words.</returns>
public static IEnumerable<string> GenerateFlat(IEnumerable<string> words, int order)
List<string> results = new List<string>();
foreach(string word in words)
results.AddRange(GenerateFlat(word, order));
return results.Distinct();
/// <summary>
/// Generates a unique list of n-grams from the given string.
/// </summary>
/// <param name="str">The string to n-gram-ise.</param>
/// <param name="order">The order of n-gram to generate.</param>
/// <returns>A unique list of n-grams found in the specified string.</returns>
public static IEnumerable<string> GenerateFlat(string str, int order)
List<string> results = new List<string>();
for(int i = 0; i < str.Length - order; i++)
results.Add(str.Substring(i, order));
return results.Distinct();
/// <summary>
/// Generates a dictionary of weighted n-grams from the given list of words.
/// The key is the ngram itself, and the value is the linear weight of the ngram.
/// </summary>
/// <param name="words">The words to n-gram-ise.</param>
/// <param name="order">The order of ngrams to generate.</param>
/// <returns>The weighted dictionary of ngrams.</returns>
public static Dictionary<string, int> GenerateWeighted(IEnumerable<string> words, int order)
Dictionary<string, int> results = new Dictionary<string, int>();
foreach(string word in words)
Dictionary<string, int> wordNgrams = GenerateWeighted(word, order);
foreach(KeyValuePair<string, int> ngram in wordNgrams)
results[ngram.Key] = 0;
results[ngram.Key] += ngram.Value;
return results;
/// <summary>
/// Generates a dictionary of weighted n-grams from the specified string.
/// </summary>
/// <param name="str">The string to n-gram-ise.</param>
/// <param name="order">The order of n-grams to generate.</param>
/// <returns>The weighted dictionary of ngrams.</returns>
public static Dictionary<string, int> GenerateWeighted(string str, int order)
Dictionary<string, int> results = new Dictionary<string, int>();
for(int i = 0; i < str.Length - order; i++)
string ngram = str.Substring(i, order);
results[ngram] = 0;
return results;

MarkovGrams/Program.cs Normal file
View file

@ -0,0 +1,30 @@
using System;
using System.Collections.Generic;
using System.IO;
namespace MarkovGrams
class MainClass
public static void Main(string[] args)
if(args.Length != 3)
Console.WriteLine(" ./MarkovGrams.exe <wordlist.txt> <order> <length>");
string wordlistFilename = args[0];
int order = int.Parse(args[1]);
int desiredStringLength = int.Parse(args[2]);
IEnumerable<string> words = File.ReadLines(wordlistFilename);
IEnumerable<string> ngrams = NGrams.GenerateFlat(words, order);
UnweightedMarkovChain chain = new UnweightedMarkovChain(ngrams);

View file

@ -0,0 +1,26 @@
using System.Reflection;
using System.Runtime.CompilerServices;
// Information about this assembly is defined by the following attributes.
// Change them to the values specific to your project.
[assembly: AssemblyTitle("MarkovGrams")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("")]
[assembly: AssemblyCopyright("sbrl")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
[assembly: AssemblyVersion("1.0.*")]
// The following attributes are used to specify the signing key for the assembly,
// if desired. See the Mono documentation for more information about signing.
//[assembly: AssemblyDelaySign(false)]
//[assembly: AssemblyKeyFile("")]

View file

@ -0,0 +1,72 @@
using System;
using System.Collections.Generic;
using System.Linq;
namespace MarkovGrams
/// <summary>
/// An unweighted character-based markov chain.
/// </summary>
public class UnweightedMarkovChain
/// <summary>
/// The random number generator
/// </summary>
Random rand = new Random();
/// <summary>
/// The ngrams that this markov chain currently contains.
/// </summary>
List<string> ngrams;
/// <summary>
/// Creates a new character-based markov chain.
/// </summary>
/// <param name="inNgrams">The ngrams to populate the new markov chain with.</param>
public UnweightedMarkovChain(IEnumerable<string> inNgrams)
ngrams = new List<string>(inNgrams);
/// <summary>
/// Returns a random ngram that's currently loaded into this UnweightedMarkovChain.
/// </summary>
/// <returns>A random ngram from this UnweightMarkovChain's cache of ngrams.</returns>
public string RandomNgram()
return ngrams[rand.Next(0, ngrams.Count)];
/// <summary>
/// Generates a new random string from the currently stored ngrams.
/// </summary>
/// <param name="length">
/// The length of ngram to generate.
/// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will
/// result in a string of length 3. Also, depending on the current ngrams this markov chain contains,
/// it may end up being cut short.
/// </param>
/// <returns>A new random string.</returns>
public string Generate(int length)
string result = RandomNgram();
string lastNgram = result;
while(result.Length < length)
// The substring that the next ngram in the chain needs to start with
string nextStartsWith = lastNgram.Substring(1);
// Get a list of possible n-grams we could choose from next
List<string> nextNgrams = ngrams.FindAll(gram => gram.StartsWith(nextStartsWith));
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
if(nextNgrams.Count == 0)
// Pick a random n-gram from the list
string nextNgram = nextNgrams.ElementAt(rand.Next(0, nextNgrams.Count));
// Add the last character from the n-gram to the string we're building
result += nextNgram[nextNgram.Length - 1];
return result;