Initial commit. I think this si worth keeping in it's entirety!
This commit is contained in:
commit
647b4cc0b1
434
.gitignore
vendored
Normal file
434
.gitignore
vendored
Normal file
|
@ -0,0 +1,434 @@
|
|||
|
||||
# Created by https://www.gitignore.io/api/visualstudio,monodevelop,csharp
|
||||
|
||||
### Csharp ###
|
||||
## Ignore Visual Studio temporary files, build results, and
|
||||
## files generated by popular Visual Studio add-ons.
|
||||
##
|
||||
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
|
||||
|
||||
# User-specific files
|
||||
*.suo
|
||||
*.user
|
||||
*.userosscache
|
||||
*.sln.docstates
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
*.userprefs
|
||||
|
||||
# Build results
|
||||
[Dd]ebug/
|
||||
[Dd]ebugPublic/
|
||||
[Rr]elease/
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Ll]og/
|
||||
|
||||
# Visual Studio 2015 cache/options directory
|
||||
.vs/
|
||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||
#wwwroot/
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
[Bb]uild[Ll]og.*
|
||||
|
||||
# NUNIT
|
||||
*.VisualState.xml
|
||||
TestResult.xml
|
||||
|
||||
# Build Results of an ATL Project
|
||||
[Dd]ebugPS/
|
||||
[Rr]eleasePS/
|
||||
dlldata.c
|
||||
|
||||
# .NET Core
|
||||
project.lock.json
|
||||
project.fragment.lock.json
|
||||
artifacts/
|
||||
**/Properties/launchSettings.json
|
||||
|
||||
*_i.c
|
||||
*_p.c
|
||||
*_i.h
|
||||
*.ilk
|
||||
*.meta
|
||||
*.obj
|
||||
*.pch
|
||||
*.pdb
|
||||
*.pgc
|
||||
*.pgd
|
||||
*.rsp
|
||||
*.sbr
|
||||
*.tlb
|
||||
*.tli
|
||||
*.tlh
|
||||
*.tmp
|
||||
*.tmp_proj
|
||||
*.log
|
||||
*.vspscc
|
||||
*.vssscc
|
||||
.builds
|
||||
*.pidb
|
||||
*.svclog
|
||||
*.scc
|
||||
|
||||
# Chutzpah Test files
|
||||
_Chutzpah*
|
||||
|
||||
# Visual C++ cache files
|
||||
ipch/
|
||||
*.aps
|
||||
*.ncb
|
||||
*.opendb
|
||||
*.opensdf
|
||||
*.sdf
|
||||
*.cachefile
|
||||
*.VC.db
|
||||
*.VC.VC.opendb
|
||||
|
||||
# Visual Studio profiler
|
||||
*.psess
|
||||
*.vsp
|
||||
*.vspx
|
||||
*.sap
|
||||
|
||||
# TFS 2012 Local Workspace
|
||||
$tf/
|
||||
|
||||
# Guidance Automation Toolkit
|
||||
*.gpState
|
||||
|
||||
# ReSharper is a .NET coding add-in
|
||||
_ReSharper*/
|
||||
*.[Rr]e[Ss]harper
|
||||
*.DotSettings.user
|
||||
|
||||
# JustCode is a .NET coding add-in
|
||||
.JustCode
|
||||
|
||||
# TeamCity is a build add-in
|
||||
_TeamCity*
|
||||
|
||||
# DotCover is a Code Coverage Tool
|
||||
*.dotCover
|
||||
|
||||
# Visual Studio code coverage results
|
||||
*.coverage
|
||||
*.coveragexml
|
||||
|
||||
# NCrunch
|
||||
_NCrunch_*
|
||||
.*crunch*.local.xml
|
||||
nCrunchTemp_*
|
||||
|
||||
# MightyMoose
|
||||
*.mm.*
|
||||
AutoTest.Net/
|
||||
|
||||
# Web workbench (sass)
|
||||
.sass-cache/
|
||||
|
||||
# Installshield output folder
|
||||
[Ee]xpress/
|
||||
|
||||
# DocProject is a documentation generator add-in
|
||||
DocProject/buildhelp/
|
||||
DocProject/Help/*.HxT
|
||||
DocProject/Help/*.HxC
|
||||
DocProject/Help/*.hhc
|
||||
DocProject/Help/*.hhk
|
||||
DocProject/Help/*.hhp
|
||||
DocProject/Help/Html2
|
||||
DocProject/Help/html
|
||||
|
||||
# Click-Once directory
|
||||
publish/
|
||||
|
||||
# Publish Web Output
|
||||
*.[Pp]ublish.xml
|
||||
*.azurePubxml
|
||||
# TODO: Comment the next line if you want to checkin your web deploy settings
|
||||
# but database connection strings (with potential passwords) will be unencrypted
|
||||
*.pubxml
|
||||
*.publishproj
|
||||
|
||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||
# in these scripts will be unencrypted
|
||||
PublishScripts/
|
||||
|
||||
# NuGet Packages
|
||||
*.nupkg
|
||||
# The packages folder can be ignored because of Package Restore
|
||||
**/packages/*
|
||||
# except build/, which is used as an MSBuild target.
|
||||
!**/packages/build/
|
||||
# Uncomment if necessary however generally it will be regenerated when needed
|
||||
#!**/packages/repositories.config
|
||||
# NuGet v3's project.json files produces more ignorable files
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
|
||||
# Microsoft Azure Build Output
|
||||
csx/
|
||||
*.build.csdef
|
||||
|
||||
# Microsoft Azure Emulator
|
||||
ecf/
|
||||
rcf/
|
||||
|
||||
# Windows Store app package directories and files
|
||||
AppPackages/
|
||||
BundleArtifacts/
|
||||
Package.StoreAssociation.xml
|
||||
_pkginfo.txt
|
||||
|
||||
# Visual Studio cache files
|
||||
# files ending in .cache can be ignored
|
||||
*.[Cc]ache
|
||||
# but keep track of directories ending in .cache
|
||||
!*.[Cc]ache/
|
||||
|
||||
# Others
|
||||
ClientBin/
|
||||
~$*
|
||||
*~
|
||||
*.dbmdl
|
||||
*.dbproj.schemaview
|
||||
*.jfm
|
||||
*.pfx
|
||||
*.publishsettings
|
||||
orleans.codegen.cs
|
||||
|
||||
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||
#bower_components/
|
||||
|
||||
# RIA/Silverlight projects
|
||||
Generated_Code/
|
||||
|
||||
# Backup & report files from converting an old project file
|
||||
# to a newer Visual Studio version. Backup files are not needed,
|
||||
# because we have git ;-)
|
||||
_UpgradeReport_Files/
|
||||
Backup*/
|
||||
UpgradeLog*.XML
|
||||
UpgradeLog*.htm
|
||||
|
||||
# SQL Server files
|
||||
*.mdf
|
||||
*.ldf
|
||||
*.ndf
|
||||
|
||||
# Business Intelligence projects
|
||||
*.rdl.data
|
||||
*.bim.layout
|
||||
*.bim_*.settings
|
||||
|
||||
# Microsoft Fakes
|
||||
FakesAssemblies/
|
||||
|
||||
# GhostDoc plugin setting file
|
||||
*.GhostDoc.xml
|
||||
|
||||
# Node.js Tools for Visual Studio
|
||||
.ntvs_analysis.dat
|
||||
node_modules/
|
||||
|
||||
# Typescript v1 declaration files
|
||||
typings/
|
||||
|
||||
# Visual Studio 6 build log
|
||||
*.plg
|
||||
|
||||
# Visual Studio 6 workspace options file
|
||||
*.opt
|
||||
|
||||
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||
*.vbw
|
||||
|
||||
# Visual Studio LightSwitch build output
|
||||
**/*.HTMLClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/ModelManifest.xml
|
||||
**/*.Server/GeneratedArtifacts
|
||||
**/*.Server/ModelManifest.xml
|
||||
_Pvt_Extensions
|
||||
|
||||
# Paket dependency manager
|
||||
.paket/paket.exe
|
||||
paket-files/
|
||||
|
||||
# FAKE - F# Make
|
||||
.fake/
|
||||
|
||||
# JetBrains Rider
|
||||
.idea/
|
||||
*.sln.iml
|
||||
|
||||
# CodeRush
|
||||
.cr/
|
||||
|
||||
# Python Tools for Visual Studio (PTVS)
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Cake - Uncomment if you are using it
|
||||
# tools/**
|
||||
# !tools/packages.config
|
||||
|
||||
# Telerik's JustMock configuration file
|
||||
*.jmconfig
|
||||
|
||||
# BizTalk build output
|
||||
*.btp.cs
|
||||
*.btm.cs
|
||||
*.odx.cs
|
||||
*.xsd.cs
|
||||
|
||||
### MonoDevelop ###
|
||||
#User Specific
|
||||
*.usertasks
|
||||
|
||||
#Mono Project Files
|
||||
*.resources
|
||||
test-results/
|
||||
|
||||
### VisualStudio ###
|
||||
## Ignore Visual Studio temporary files, build results, and
|
||||
## files generated by popular Visual Studio add-ons.
|
||||
##
|
||||
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
|
||||
|
||||
# User-specific files
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
|
||||
# Build results
|
||||
|
||||
# Visual Studio 2015 cache/options directory
|
||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||
#wwwroot/
|
||||
|
||||
# MSTest test Results
|
||||
|
||||
# NUNIT
|
||||
|
||||
# Build Results of an ATL Project
|
||||
|
||||
# .NET Core
|
||||
|
||||
|
||||
# Chutzpah Test files
|
||||
|
||||
# Visual C++ cache files
|
||||
|
||||
# Visual Studio profiler
|
||||
|
||||
# TFS 2012 Local Workspace
|
||||
|
||||
# Guidance Automation Toolkit
|
||||
|
||||
# ReSharper is a .NET coding add-in
|
||||
|
||||
# JustCode is a .NET coding add-in
|
||||
|
||||
# TeamCity is a build add-in
|
||||
|
||||
# DotCover is a Code Coverage Tool
|
||||
|
||||
# Visual Studio code coverage results
|
||||
|
||||
# NCrunch
|
||||
|
||||
# MightyMoose
|
||||
|
||||
# Web workbench (sass)
|
||||
|
||||
# Installshield output folder
|
||||
|
||||
# DocProject is a documentation generator add-in
|
||||
|
||||
# Click-Once directory
|
||||
|
||||
# Publish Web Output
|
||||
# TODO: Comment the next line if you want to checkin your web deploy settings
|
||||
# but database connection strings (with potential passwords) will be unencrypted
|
||||
|
||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||
# in these scripts will be unencrypted
|
||||
|
||||
# NuGet Packages
|
||||
# The packages folder can be ignored because of Package Restore
|
||||
# except build/, which is used as an MSBuild target.
|
||||
# Uncomment if necessary however generally it will be regenerated when needed
|
||||
#!**/packages/repositories.config
|
||||
# NuGet v3's project.json files produces more ignorable files
|
||||
|
||||
# Microsoft Azure Build Output
|
||||
|
||||
# Microsoft Azure Emulator
|
||||
|
||||
# Windows Store app package directories and files
|
||||
|
||||
# Visual Studio cache files
|
||||
# files ending in .cache can be ignored
|
||||
# but keep track of directories ending in .cache
|
||||
|
||||
# Others
|
||||
|
||||
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||
#bower_components/
|
||||
|
||||
# RIA/Silverlight projects
|
||||
|
||||
# Backup & report files from converting an old project file
|
||||
# to a newer Visual Studio version. Backup files are not needed,
|
||||
# because we have git ;-)
|
||||
|
||||
# SQL Server files
|
||||
|
||||
# Business Intelligence projects
|
||||
|
||||
# Microsoft Fakes
|
||||
|
||||
# GhostDoc plugin setting file
|
||||
|
||||
# Node.js Tools for Visual Studio
|
||||
|
||||
# Typescript v1 declaration files
|
||||
|
||||
# Visual Studio 6 build log
|
||||
|
||||
# Visual Studio 6 workspace options file
|
||||
|
||||
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||
|
||||
# Visual Studio LightSwitch build output
|
||||
|
||||
# Paket dependency manager
|
||||
|
||||
# FAKE - F# Make
|
||||
|
||||
# JetBrains Rider
|
||||
|
||||
# CodeRush
|
||||
|
||||
# Python Tools for Visual Studio (PTVS)
|
||||
|
||||
# Cake - Uncomment if you are using it
|
||||
# tools/**
|
||||
# !tools/packages.config
|
||||
|
||||
# Telerik's JustMock configuration file
|
||||
|
||||
# BizTalk build output
|
||||
|
||||
# End of https://www.gitignore.io/api/visualstudio,monodevelop,csharp
|
17
MarkovGrams.sln
Normal file
17
MarkovGrams.sln
Normal file
|
@ -0,0 +1,17 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkovGrams", "MarkovGrams\MarkovGrams.csproj", "{14743F58-9418-4147-9C2C-0626AD7185D3}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{14743F58-9418-4147-9C2C-0626AD7185D3}.Debug|x86.ActiveCfg = Debug|x86
|
||||
{14743F58-9418-4147-9C2C-0626AD7185D3}.Debug|x86.Build.0 = Debug|x86
|
||||
{14743F58-9418-4147-9C2C-0626AD7185D3}.Release|x86.ActiveCfg = Release|x86
|
||||
{14743F58-9418-4147-9C2C-0626AD7185D3}.Release|x86.Build.0 = Release|x86
|
||||
EndGlobalSection
|
||||
EndGlobal
|
41
MarkovGrams/MarkovGrams.csproj
Normal file
41
MarkovGrams/MarkovGrams.csproj
Normal file
|
@ -0,0 +1,41 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
|
||||
<ProjectGuid>{14743F58-9418-4147-9C2C-0626AD7185D3}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<RootNamespace>MarkovGrams</RootNamespace>
|
||||
<AssemblyName>MarkovGrams</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug</OutputPath>
|
||||
<DefineConstants>DEBUG;</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<ExternalConsole>true</ExternalConsole>
|
||||
<PlatformTarget>x86</PlatformTarget>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release</OutputPath>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<ExternalConsole>true</ExternalConsole>
|
||||
<PlatformTarget>x86</PlatformTarget>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Program.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
<Compile Include="NGrams.cs" />
|
||||
<Compile Include="UnweightedMarkovChain.cs" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||
</Project>
|
85
MarkovGrams/NGrams.cs
Normal file
85
MarkovGrams/NGrams.cs
Normal file
|
@ -0,0 +1,85 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
namespace MarkovGrams
|
||||
{
|
||||
/// <summary>
|
||||
/// A collection of methods to generate various different types of n-grams.
|
||||
/// </summary>
|
||||
public static class NGrams
|
||||
{
|
||||
/// <summary>
|
||||
/// Generates a unique list of n-grams that the given list of words.
|
||||
/// </summary>
|
||||
/// <param name="words">The words to turn into n-grams.</param>
|
||||
/// <param name="order">The order of n-gram to generate..</param>
|
||||
/// <returns>A unique list of n-grams found in the given list of words.</returns>
|
||||
public static IEnumerable<string> GenerateFlat(IEnumerable<string> words, int order)
|
||||
{
|
||||
List<string> results = new List<string>();
|
||||
foreach(string word in words)
|
||||
{
|
||||
results.AddRange(GenerateFlat(word, order));
|
||||
}
|
||||
return results.Distinct();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates a unique list of n-grams from the given string.
|
||||
/// </summary>
|
||||
/// <param name="str">The string to n-gram-ise.</param>
|
||||
/// <param name="order">The order of n-gram to generate.</param>
|
||||
/// <returns>A unique list of n-grams found in the specified string.</returns>
|
||||
public static IEnumerable<string> GenerateFlat(string str, int order)
|
||||
{
|
||||
List<string> results = new List<string>();
|
||||
for(int i = 0; i < str.Length - order; i++)
|
||||
{
|
||||
results.Add(str.Substring(i, order));
|
||||
}
|
||||
return results.Distinct();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates a dictionary of weighted n-grams from the given list of words.
|
||||
/// The key is the ngram itself, and the value is the linear weight of the ngram.
|
||||
/// </summary>
|
||||
/// <param name="words">The words to n-gram-ise.</param>
|
||||
/// <param name="order">The order of ngrams to generate.</param>
|
||||
/// <returns>The weighted dictionary of ngrams.</returns>
|
||||
public static Dictionary<string, int> GenerateWeighted(IEnumerable<string> words, int order)
|
||||
{
|
||||
Dictionary<string, int> results = new Dictionary<string, int>();
|
||||
foreach(string word in words)
|
||||
{
|
||||
Dictionary<string, int> wordNgrams = GenerateWeighted(word, order);
|
||||
foreach(KeyValuePair<string, int> ngram in wordNgrams)
|
||||
{
|
||||
if(!results.ContainsKey(ngram.Key))
|
||||
results[ngram.Key] = 0;
|
||||
results[ngram.Key] += ngram.Value;
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
/// <summary>
|
||||
/// Generates a dictionary of weighted n-grams from the specified string.
|
||||
/// </summary>
|
||||
/// <param name="str">The string to n-gram-ise.</param>
|
||||
/// <param name="order">The order of n-grams to generate.</param>
|
||||
/// <returns>The weighted dictionary of ngrams.</returns>
|
||||
public static Dictionary<string, int> GenerateWeighted(string str, int order)
|
||||
{
|
||||
Dictionary<string, int> results = new Dictionary<string, int>();
|
||||
for(int i = 0; i < str.Length - order; i++)
|
||||
{
|
||||
string ngram = str.Substring(i, order);
|
||||
if(!results.ContainsKey(ngram))
|
||||
results[ngram] = 0;
|
||||
results[ngram]++;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
||||
}
|
30
MarkovGrams/Program.cs
Normal file
30
MarkovGrams/Program.cs
Normal file
|
@ -0,0 +1,30 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
|
||||
namespace MarkovGrams
|
||||
{
|
||||
class MainClass
|
||||
{
|
||||
public static void Main(string[] args)
|
||||
{
|
||||
if(args.Length != 3)
|
||||
{
|
||||
Console.WriteLine("Usage:");
|
||||
Console.WriteLine(" ./MarkovGrams.exe <wordlist.txt> <order> <length>");
|
||||
return;
|
||||
}
|
||||
|
||||
string wordlistFilename = args[0];
|
||||
int order = int.Parse(args[1]);
|
||||
int desiredStringLength = int.Parse(args[2]);
|
||||
|
||||
IEnumerable<string> words = File.ReadLines(wordlistFilename);
|
||||
IEnumerable<string> ngrams = NGrams.GenerateFlat(words, order);
|
||||
|
||||
UnweightedMarkovChain chain = new UnweightedMarkovChain(ngrams);
|
||||
|
||||
Console.WriteLine(chain.Generate(desiredStringLength));
|
||||
}
|
||||
}
|
||||
}
|
26
MarkovGrams/Properties/AssemblyInfo.cs
Normal file
26
MarkovGrams/Properties/AssemblyInfo.cs
Normal file
|
@ -0,0 +1,26 @@
|
|||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
// Information about this assembly is defined by the following attributes.
|
||||
// Change them to the values specific to your project.
|
||||
|
||||
[assembly: AssemblyTitle("MarkovGrams")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("")]
|
||||
[assembly: AssemblyCopyright("sbrl")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
|
||||
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
|
||||
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
|
||||
|
||||
[assembly: AssemblyVersion("1.0.*")]
|
||||
|
||||
// The following attributes are used to specify the signing key for the assembly,
|
||||
// if desired. See the Mono documentation for more information about signing.
|
||||
|
||||
//[assembly: AssemblyDelaySign(false)]
|
||||
//[assembly: AssemblyKeyFile("")]
|
72
MarkovGrams/UnweightedMarkovChain.cs
Normal file
72
MarkovGrams/UnweightedMarkovChain.cs
Normal file
|
@ -0,0 +1,72 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
namespace MarkovGrams
|
||||
{
|
||||
/// <summary>
|
||||
/// An unweighted character-based markov chain.
|
||||
/// </summary>
|
||||
public class UnweightedMarkovChain
|
||||
{
|
||||
/// <summary>
|
||||
/// The random number generator
|
||||
/// </summary>
|
||||
Random rand = new Random();
|
||||
|
||||
/// <summary>
|
||||
/// The ngrams that this markov chain currently contains.
|
||||
/// </summary>
|
||||
List<string> ngrams;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new character-based markov chain.
|
||||
/// </summary>
|
||||
/// <param name="inNgrams">The ngrams to populate the new markov chain with.</param>
|
||||
public UnweightedMarkovChain(IEnumerable<string> inNgrams)
|
||||
{
|
||||
ngrams = new List<string>(inNgrams);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a random ngram that's currently loaded into this UnweightedMarkovChain.
|
||||
/// </summary>
|
||||
/// <returns>A random ngram from this UnweightMarkovChain's cache of ngrams.</returns>
|
||||
public string RandomNgram()
|
||||
{
|
||||
return ngrams[rand.Next(0, ngrams.Count)];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates a new random string from the currently stored ngrams.
|
||||
/// </summary>
|
||||
/// <param name="length">
|
||||
/// The length of ngram to generate.
|
||||
/// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will
|
||||
/// result in a string of length 3. Also, depending on the current ngrams this markov chain contains,
|
||||
/// it may end up being cut short.
|
||||
/// </param>
|
||||
/// <returns>A new random string.</returns>
|
||||
public string Generate(int length)
|
||||
{
|
||||
string result = RandomNgram();
|
||||
string lastNgram = result;
|
||||
while(result.Length < length)
|
||||
{
|
||||
// The substring that the next ngram in the chain needs to start with
|
||||
string nextStartsWith = lastNgram.Substring(1);
|
||||
// Get a list of possible n-grams we could choose from next
|
||||
List<string> nextNgrams = ngrams.FindAll(gram => gram.StartsWith(nextStartsWith));
|
||||
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
|
||||
if(nextNgrams.Count == 0)
|
||||
break;
|
||||
// Pick a random n-gram from the list
|
||||
string nextNgram = nextNgrams.ElementAt(rand.Next(0, nextNgrams.Count));
|
||||
// Add the last character from the n-gram to the string we're building
|
||||
result += nextNgram[nextNgram.Length - 1];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue