Initial commit. I think this si worth keeping in it's entirety!

This commit is contained in:
Starbeamrainbowlabs 2017-04-26 21:35:00 +01:00
commit 647b4cc0b1
7 changed files with 705 additions and 0 deletions

434
.gitignore vendored Normal file
View file

@ -0,0 +1,434 @@
# Created by https://www.gitignore.io/api/visualstudio,monodevelop,csharp
### Csharp ###
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
# Visual Studio 2015 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUNIT
*.VisualState.xml
TestResult.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
**/Properties/launchSettings.json
*_i.c
*_p.c
*_i.h
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# JustCode is a .NET coding add-in
.JustCode
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# TODO: Comment the next line if you want to checkin your web deploy settings
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# The packages folder can be ignored because of Package Restore
**/packages/*
# except build/, which is used as an MSBuild target.
!**/packages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/packages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Typescript v1 declaration files
typings/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# JetBrains Rider
.idea/
*.sln.iml
# CodeRush
.cr/
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
### MonoDevelop ###
#User Specific
*.usertasks
#Mono Project Files
*.resources
test-results/
### VisualStudio ###
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
# User-specific files
# User-specific files (MonoDevelop/Xamarin Studio)
# Build results
# Visual Studio 2015 cache/options directory
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# MSTest test Results
# NUNIT
# Build Results of an ATL Project
# .NET Core
# Chutzpah Test files
# Visual C++ cache files
# Visual Studio profiler
# TFS 2012 Local Workspace
# Guidance Automation Toolkit
# ReSharper is a .NET coding add-in
# JustCode is a .NET coding add-in
# TeamCity is a build add-in
# DotCover is a Code Coverage Tool
# Visual Studio code coverage results
# NCrunch
# MightyMoose
# Web workbench (sass)
# Installshield output folder
# DocProject is a documentation generator add-in
# Click-Once directory
# Publish Web Output
# TODO: Comment the next line if you want to checkin your web deploy settings
# but database connection strings (with potential passwords) will be unencrypted
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
# NuGet Packages
# The packages folder can be ignored because of Package Restore
# except build/, which is used as an MSBuild target.
# Uncomment if necessary however generally it will be regenerated when needed
#!**/packages/repositories.config
# NuGet v3's project.json files produces more ignorable files
# Microsoft Azure Build Output
# Microsoft Azure Emulator
# Windows Store app package directories and files
# Visual Studio cache files
# files ending in .cache can be ignored
# but keep track of directories ending in .cache
# Others
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
# SQL Server files
# Business Intelligence projects
# Microsoft Fakes
# GhostDoc plugin setting file
# Node.js Tools for Visual Studio
# Typescript v1 declaration files
# Visual Studio 6 build log
# Visual Studio 6 workspace options file
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
# Visual Studio LightSwitch build output
# Paket dependency manager
# FAKE - F# Make
# JetBrains Rider
# CodeRush
# Python Tools for Visual Studio (PTVS)
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Telerik's JustMock configuration file
# BizTalk build output
# End of https://www.gitignore.io/api/visualstudio,monodevelop,csharp

17
MarkovGrams.sln Normal file
View file

@ -0,0 +1,17 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkovGrams", "MarkovGrams\MarkovGrams.csproj", "{14743F58-9418-4147-9C2C-0626AD7185D3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x86 = Debug|x86
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{14743F58-9418-4147-9C2C-0626AD7185D3}.Debug|x86.ActiveCfg = Debug|x86
{14743F58-9418-4147-9C2C-0626AD7185D3}.Debug|x86.Build.0 = Debug|x86
{14743F58-9418-4147-9C2C-0626AD7185D3}.Release|x86.ActiveCfg = Release|x86
{14743F58-9418-4147-9C2C-0626AD7185D3}.Release|x86.Build.0 = Release|x86
EndGlobalSection
EndGlobal

View file

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
<ProjectGuid>{14743F58-9418-4147-9C2C-0626AD7185D3}</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>MarkovGrams</RootNamespace>
<AssemblyName>MarkovGrams</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug</OutputPath>
<DefineConstants>DEBUG;</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<ExternalConsole>true</ExternalConsole>
<PlatformTarget>x86</PlatformTarget>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<Optimize>true</Optimize>
<OutputPath>bin\Release</OutputPath>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<ExternalConsole>true</ExternalConsole>
<PlatformTarget>x86</PlatformTarget>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="NGrams.cs" />
<Compile Include="UnweightedMarkovChain.cs" />
</ItemGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
</Project>

85
MarkovGrams/NGrams.cs Normal file
View file

@ -0,0 +1,85 @@
using System;
using System.Collections.Generic;
using System.Linq;
namespace MarkovGrams
{
/// <summary>
/// A collection of methods to generate various different types of n-grams.
/// </summary>
public static class NGrams
{
/// <summary>
/// Generates a unique list of n-grams that the given list of words.
/// </summary>
/// <param name="words">The words to turn into n-grams.</param>
/// <param name="order">The order of n-gram to generate..</param>
/// <returns>A unique list of n-grams found in the given list of words.</returns>
public static IEnumerable<string> GenerateFlat(IEnumerable<string> words, int order)
{
List<string> results = new List<string>();
foreach(string word in words)
{
results.AddRange(GenerateFlat(word, order));
}
return results.Distinct();
}
/// <summary>
/// Generates a unique list of n-grams from the given string.
/// </summary>
/// <param name="str">The string to n-gram-ise.</param>
/// <param name="order">The order of n-gram to generate.</param>
/// <returns>A unique list of n-grams found in the specified string.</returns>
public static IEnumerable<string> GenerateFlat(string str, int order)
{
List<string> results = new List<string>();
for(int i = 0; i < str.Length - order; i++)
{
results.Add(str.Substring(i, order));
}
return results.Distinct();
}
/// <summary>
/// Generates a dictionary of weighted n-grams from the given list of words.
/// The key is the ngram itself, and the value is the linear weight of the ngram.
/// </summary>
/// <param name="words">The words to n-gram-ise.</param>
/// <param name="order">The order of ngrams to generate.</param>
/// <returns>The weighted dictionary of ngrams.</returns>
public static Dictionary<string, int> GenerateWeighted(IEnumerable<string> words, int order)
{
Dictionary<string, int> results = new Dictionary<string, int>();
foreach(string word in words)
{
Dictionary<string, int> wordNgrams = GenerateWeighted(word, order);
foreach(KeyValuePair<string, int> ngram in wordNgrams)
{
if(!results.ContainsKey(ngram.Key))
results[ngram.Key] = 0;
results[ngram.Key] += ngram.Value;
}
}
return results;
}
/// <summary>
/// Generates a dictionary of weighted n-grams from the specified string.
/// </summary>
/// <param name="str">The string to n-gram-ise.</param>
/// <param name="order">The order of n-grams to generate.</param>
/// <returns>The weighted dictionary of ngrams.</returns>
public static Dictionary<string, int> GenerateWeighted(string str, int order)
{
Dictionary<string, int> results = new Dictionary<string, int>();
for(int i = 0; i < str.Length - order; i++)
{
string ngram = str.Substring(i, order);
if(!results.ContainsKey(ngram))
results[ngram] = 0;
results[ngram]++;
}
return results;
}
}
}

30
MarkovGrams/Program.cs Normal file
View file

@ -0,0 +1,30 @@
using System;
using System.Collections.Generic;
using System.IO;
namespace MarkovGrams
{
class MainClass
{
public static void Main(string[] args)
{
if(args.Length != 3)
{
Console.WriteLine("Usage:");
Console.WriteLine(" ./MarkovGrams.exe <wordlist.txt> <order> <length>");
return;
}
string wordlistFilename = args[0];
int order = int.Parse(args[1]);
int desiredStringLength = int.Parse(args[2]);
IEnumerable<string> words = File.ReadLines(wordlistFilename);
IEnumerable<string> ngrams = NGrams.GenerateFlat(words, order);
UnweightedMarkovChain chain = new UnweightedMarkovChain(ngrams);
Console.WriteLine(chain.Generate(desiredStringLength));
}
}
}

View file

@ -0,0 +1,26 @@
using System.Reflection;
using System.Runtime.CompilerServices;
// Information about this assembly is defined by the following attributes.
// Change them to the values specific to your project.
[assembly: AssemblyTitle("MarkovGrams")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("")]
[assembly: AssemblyCopyright("sbrl")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
[assembly: AssemblyVersion("1.0.*")]
// The following attributes are used to specify the signing key for the assembly,
// if desired. See the Mono documentation for more information about signing.
//[assembly: AssemblyDelaySign(false)]
//[assembly: AssemblyKeyFile("")]

View file

@ -0,0 +1,72 @@
using System;
using System.Collections.Generic;
using System.Linq;
namespace MarkovGrams
{
/// <summary>
/// An unweighted character-based markov chain.
/// </summary>
public class UnweightedMarkovChain
{
/// <summary>
/// The random number generator
/// </summary>
Random rand = new Random();
/// <summary>
/// The ngrams that this markov chain currently contains.
/// </summary>
List<string> ngrams;
/// <summary>
/// Creates a new character-based markov chain.
/// </summary>
/// <param name="inNgrams">The ngrams to populate the new markov chain with.</param>
public UnweightedMarkovChain(IEnumerable<string> inNgrams)
{
ngrams = new List<string>(inNgrams);
}
/// <summary>
/// Returns a random ngram that's currently loaded into this UnweightedMarkovChain.
/// </summary>
/// <returns>A random ngram from this UnweightMarkovChain's cache of ngrams.</returns>
public string RandomNgram()
{
return ngrams[rand.Next(0, ngrams.Count)];
}
/// <summary>
/// Generates a new random string from the currently stored ngrams.
/// </summary>
/// <param name="length">
/// The length of ngram to generate.
/// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will
/// result in a string of length 3. Also, depending on the current ngrams this markov chain contains,
/// it may end up being cut short.
/// </param>
/// <returns>A new random string.</returns>
public string Generate(int length)
{
string result = RandomNgram();
string lastNgram = result;
while(result.Length < length)
{
// The substring that the next ngram in the chain needs to start with
string nextStartsWith = lastNgram.Substring(1);
// Get a list of possible n-grams we could choose from next
List<string> nextNgrams = ngrams.FindAll(gram => gram.StartsWith(nextStartsWith));
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
if(nextNgrams.Count == 0)
break;
// Pick a random n-gram from the list
string nextNgram = nextNgrams.ElementAt(rand.Next(0, nextNgrams.Count));
// Add the last character from the n-gram to the string we're building
result += nextNgram[nextNgram.Length - 1];
}
return result;
}
}
}