Initial commit. I think this si worth keeping in it's entirety!
This commit is contained in:
commit
647b4cc0b1
7 changed files with 705 additions and 0 deletions
434
.gitignore
vendored
Normal file
434
.gitignore
vendored
Normal file
|
@ -0,0 +1,434 @@
|
||||||
|
|
||||||
|
# Created by https://www.gitignore.io/api/visualstudio,monodevelop,csharp
|
||||||
|
|
||||||
|
### Csharp ###
|
||||||
|
## Ignore Visual Studio temporary files, build results, and
|
||||||
|
## files generated by popular Visual Studio add-ons.
|
||||||
|
##
|
||||||
|
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
|
||||||
|
|
||||||
|
# User-specific files
|
||||||
|
*.suo
|
||||||
|
*.user
|
||||||
|
*.userosscache
|
||||||
|
*.sln.docstates
|
||||||
|
|
||||||
|
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||||
|
*.userprefs
|
||||||
|
|
||||||
|
# Build results
|
||||||
|
[Dd]ebug/
|
||||||
|
[Dd]ebugPublic/
|
||||||
|
[Rr]elease/
|
||||||
|
[Rr]eleases/
|
||||||
|
x64/
|
||||||
|
x86/
|
||||||
|
bld/
|
||||||
|
[Bb]in/
|
||||||
|
[Oo]bj/
|
||||||
|
[Ll]og/
|
||||||
|
|
||||||
|
# Visual Studio 2015 cache/options directory
|
||||||
|
.vs/
|
||||||
|
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||||
|
#wwwroot/
|
||||||
|
|
||||||
|
# MSTest test Results
|
||||||
|
[Tt]est[Rr]esult*/
|
||||||
|
[Bb]uild[Ll]og.*
|
||||||
|
|
||||||
|
# NUNIT
|
||||||
|
*.VisualState.xml
|
||||||
|
TestResult.xml
|
||||||
|
|
||||||
|
# Build Results of an ATL Project
|
||||||
|
[Dd]ebugPS/
|
||||||
|
[Rr]eleasePS/
|
||||||
|
dlldata.c
|
||||||
|
|
||||||
|
# .NET Core
|
||||||
|
project.lock.json
|
||||||
|
project.fragment.lock.json
|
||||||
|
artifacts/
|
||||||
|
**/Properties/launchSettings.json
|
||||||
|
|
||||||
|
*_i.c
|
||||||
|
*_p.c
|
||||||
|
*_i.h
|
||||||
|
*.ilk
|
||||||
|
*.meta
|
||||||
|
*.obj
|
||||||
|
*.pch
|
||||||
|
*.pdb
|
||||||
|
*.pgc
|
||||||
|
*.pgd
|
||||||
|
*.rsp
|
||||||
|
*.sbr
|
||||||
|
*.tlb
|
||||||
|
*.tli
|
||||||
|
*.tlh
|
||||||
|
*.tmp
|
||||||
|
*.tmp_proj
|
||||||
|
*.log
|
||||||
|
*.vspscc
|
||||||
|
*.vssscc
|
||||||
|
.builds
|
||||||
|
*.pidb
|
||||||
|
*.svclog
|
||||||
|
*.scc
|
||||||
|
|
||||||
|
# Chutzpah Test files
|
||||||
|
_Chutzpah*
|
||||||
|
|
||||||
|
# Visual C++ cache files
|
||||||
|
ipch/
|
||||||
|
*.aps
|
||||||
|
*.ncb
|
||||||
|
*.opendb
|
||||||
|
*.opensdf
|
||||||
|
*.sdf
|
||||||
|
*.cachefile
|
||||||
|
*.VC.db
|
||||||
|
*.VC.VC.opendb
|
||||||
|
|
||||||
|
# Visual Studio profiler
|
||||||
|
*.psess
|
||||||
|
*.vsp
|
||||||
|
*.vspx
|
||||||
|
*.sap
|
||||||
|
|
||||||
|
# TFS 2012 Local Workspace
|
||||||
|
$tf/
|
||||||
|
|
||||||
|
# Guidance Automation Toolkit
|
||||||
|
*.gpState
|
||||||
|
|
||||||
|
# ReSharper is a .NET coding add-in
|
||||||
|
_ReSharper*/
|
||||||
|
*.[Rr]e[Ss]harper
|
||||||
|
*.DotSettings.user
|
||||||
|
|
||||||
|
# JustCode is a .NET coding add-in
|
||||||
|
.JustCode
|
||||||
|
|
||||||
|
# TeamCity is a build add-in
|
||||||
|
_TeamCity*
|
||||||
|
|
||||||
|
# DotCover is a Code Coverage Tool
|
||||||
|
*.dotCover
|
||||||
|
|
||||||
|
# Visual Studio code coverage results
|
||||||
|
*.coverage
|
||||||
|
*.coveragexml
|
||||||
|
|
||||||
|
# NCrunch
|
||||||
|
_NCrunch_*
|
||||||
|
.*crunch*.local.xml
|
||||||
|
nCrunchTemp_*
|
||||||
|
|
||||||
|
# MightyMoose
|
||||||
|
*.mm.*
|
||||||
|
AutoTest.Net/
|
||||||
|
|
||||||
|
# Web workbench (sass)
|
||||||
|
.sass-cache/
|
||||||
|
|
||||||
|
# Installshield output folder
|
||||||
|
[Ee]xpress/
|
||||||
|
|
||||||
|
# DocProject is a documentation generator add-in
|
||||||
|
DocProject/buildhelp/
|
||||||
|
DocProject/Help/*.HxT
|
||||||
|
DocProject/Help/*.HxC
|
||||||
|
DocProject/Help/*.hhc
|
||||||
|
DocProject/Help/*.hhk
|
||||||
|
DocProject/Help/*.hhp
|
||||||
|
DocProject/Help/Html2
|
||||||
|
DocProject/Help/html
|
||||||
|
|
||||||
|
# Click-Once directory
|
||||||
|
publish/
|
||||||
|
|
||||||
|
# Publish Web Output
|
||||||
|
*.[Pp]ublish.xml
|
||||||
|
*.azurePubxml
|
||||||
|
# TODO: Comment the next line if you want to checkin your web deploy settings
|
||||||
|
# but database connection strings (with potential passwords) will be unencrypted
|
||||||
|
*.pubxml
|
||||||
|
*.publishproj
|
||||||
|
|
||||||
|
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||||
|
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||||
|
# in these scripts will be unencrypted
|
||||||
|
PublishScripts/
|
||||||
|
|
||||||
|
# NuGet Packages
|
||||||
|
*.nupkg
|
||||||
|
# The packages folder can be ignored because of Package Restore
|
||||||
|
**/packages/*
|
||||||
|
# except build/, which is used as an MSBuild target.
|
||||||
|
!**/packages/build/
|
||||||
|
# Uncomment if necessary however generally it will be regenerated when needed
|
||||||
|
#!**/packages/repositories.config
|
||||||
|
# NuGet v3's project.json files produces more ignorable files
|
||||||
|
*.nuget.props
|
||||||
|
*.nuget.targets
|
||||||
|
|
||||||
|
# Microsoft Azure Build Output
|
||||||
|
csx/
|
||||||
|
*.build.csdef
|
||||||
|
|
||||||
|
# Microsoft Azure Emulator
|
||||||
|
ecf/
|
||||||
|
rcf/
|
||||||
|
|
||||||
|
# Windows Store app package directories and files
|
||||||
|
AppPackages/
|
||||||
|
BundleArtifacts/
|
||||||
|
Package.StoreAssociation.xml
|
||||||
|
_pkginfo.txt
|
||||||
|
|
||||||
|
# Visual Studio cache files
|
||||||
|
# files ending in .cache can be ignored
|
||||||
|
*.[Cc]ache
|
||||||
|
# but keep track of directories ending in .cache
|
||||||
|
!*.[Cc]ache/
|
||||||
|
|
||||||
|
# Others
|
||||||
|
ClientBin/
|
||||||
|
~$*
|
||||||
|
*~
|
||||||
|
*.dbmdl
|
||||||
|
*.dbproj.schemaview
|
||||||
|
*.jfm
|
||||||
|
*.pfx
|
||||||
|
*.publishsettings
|
||||||
|
orleans.codegen.cs
|
||||||
|
|
||||||
|
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||||
|
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||||
|
#bower_components/
|
||||||
|
|
||||||
|
# RIA/Silverlight projects
|
||||||
|
Generated_Code/
|
||||||
|
|
||||||
|
# Backup & report files from converting an old project file
|
||||||
|
# to a newer Visual Studio version. Backup files are not needed,
|
||||||
|
# because we have git ;-)
|
||||||
|
_UpgradeReport_Files/
|
||||||
|
Backup*/
|
||||||
|
UpgradeLog*.XML
|
||||||
|
UpgradeLog*.htm
|
||||||
|
|
||||||
|
# SQL Server files
|
||||||
|
*.mdf
|
||||||
|
*.ldf
|
||||||
|
*.ndf
|
||||||
|
|
||||||
|
# Business Intelligence projects
|
||||||
|
*.rdl.data
|
||||||
|
*.bim.layout
|
||||||
|
*.bim_*.settings
|
||||||
|
|
||||||
|
# Microsoft Fakes
|
||||||
|
FakesAssemblies/
|
||||||
|
|
||||||
|
# GhostDoc plugin setting file
|
||||||
|
*.GhostDoc.xml
|
||||||
|
|
||||||
|
# Node.js Tools for Visual Studio
|
||||||
|
.ntvs_analysis.dat
|
||||||
|
node_modules/
|
||||||
|
|
||||||
|
# Typescript v1 declaration files
|
||||||
|
typings/
|
||||||
|
|
||||||
|
# Visual Studio 6 build log
|
||||||
|
*.plg
|
||||||
|
|
||||||
|
# Visual Studio 6 workspace options file
|
||||||
|
*.opt
|
||||||
|
|
||||||
|
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||||
|
*.vbw
|
||||||
|
|
||||||
|
# Visual Studio LightSwitch build output
|
||||||
|
**/*.HTMLClient/GeneratedArtifacts
|
||||||
|
**/*.DesktopClient/GeneratedArtifacts
|
||||||
|
**/*.DesktopClient/ModelManifest.xml
|
||||||
|
**/*.Server/GeneratedArtifacts
|
||||||
|
**/*.Server/ModelManifest.xml
|
||||||
|
_Pvt_Extensions
|
||||||
|
|
||||||
|
# Paket dependency manager
|
||||||
|
.paket/paket.exe
|
||||||
|
paket-files/
|
||||||
|
|
||||||
|
# FAKE - F# Make
|
||||||
|
.fake/
|
||||||
|
|
||||||
|
# JetBrains Rider
|
||||||
|
.idea/
|
||||||
|
*.sln.iml
|
||||||
|
|
||||||
|
# CodeRush
|
||||||
|
.cr/
|
||||||
|
|
||||||
|
# Python Tools for Visual Studio (PTVS)
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
|
||||||
|
# Cake - Uncomment if you are using it
|
||||||
|
# tools/**
|
||||||
|
# !tools/packages.config
|
||||||
|
|
||||||
|
# Telerik's JustMock configuration file
|
||||||
|
*.jmconfig
|
||||||
|
|
||||||
|
# BizTalk build output
|
||||||
|
*.btp.cs
|
||||||
|
*.btm.cs
|
||||||
|
*.odx.cs
|
||||||
|
*.xsd.cs
|
||||||
|
|
||||||
|
### MonoDevelop ###
|
||||||
|
#User Specific
|
||||||
|
*.usertasks
|
||||||
|
|
||||||
|
#Mono Project Files
|
||||||
|
*.resources
|
||||||
|
test-results/
|
||||||
|
|
||||||
|
### VisualStudio ###
|
||||||
|
## Ignore Visual Studio temporary files, build results, and
|
||||||
|
## files generated by popular Visual Studio add-ons.
|
||||||
|
##
|
||||||
|
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
|
||||||
|
|
||||||
|
# User-specific files
|
||||||
|
|
||||||
|
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||||
|
|
||||||
|
# Build results
|
||||||
|
|
||||||
|
# Visual Studio 2015 cache/options directory
|
||||||
|
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||||
|
#wwwroot/
|
||||||
|
|
||||||
|
# MSTest test Results
|
||||||
|
|
||||||
|
# NUNIT
|
||||||
|
|
||||||
|
# Build Results of an ATL Project
|
||||||
|
|
||||||
|
# .NET Core
|
||||||
|
|
||||||
|
|
||||||
|
# Chutzpah Test files
|
||||||
|
|
||||||
|
# Visual C++ cache files
|
||||||
|
|
||||||
|
# Visual Studio profiler
|
||||||
|
|
||||||
|
# TFS 2012 Local Workspace
|
||||||
|
|
||||||
|
# Guidance Automation Toolkit
|
||||||
|
|
||||||
|
# ReSharper is a .NET coding add-in
|
||||||
|
|
||||||
|
# JustCode is a .NET coding add-in
|
||||||
|
|
||||||
|
# TeamCity is a build add-in
|
||||||
|
|
||||||
|
# DotCover is a Code Coverage Tool
|
||||||
|
|
||||||
|
# Visual Studio code coverage results
|
||||||
|
|
||||||
|
# NCrunch
|
||||||
|
|
||||||
|
# MightyMoose
|
||||||
|
|
||||||
|
# Web workbench (sass)
|
||||||
|
|
||||||
|
# Installshield output folder
|
||||||
|
|
||||||
|
# DocProject is a documentation generator add-in
|
||||||
|
|
||||||
|
# Click-Once directory
|
||||||
|
|
||||||
|
# Publish Web Output
|
||||||
|
# TODO: Comment the next line if you want to checkin your web deploy settings
|
||||||
|
# but database connection strings (with potential passwords) will be unencrypted
|
||||||
|
|
||||||
|
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||||
|
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||||
|
# in these scripts will be unencrypted
|
||||||
|
|
||||||
|
# NuGet Packages
|
||||||
|
# The packages folder can be ignored because of Package Restore
|
||||||
|
# except build/, which is used as an MSBuild target.
|
||||||
|
# Uncomment if necessary however generally it will be regenerated when needed
|
||||||
|
#!**/packages/repositories.config
|
||||||
|
# NuGet v3's project.json files produces more ignorable files
|
||||||
|
|
||||||
|
# Microsoft Azure Build Output
|
||||||
|
|
||||||
|
# Microsoft Azure Emulator
|
||||||
|
|
||||||
|
# Windows Store app package directories and files
|
||||||
|
|
||||||
|
# Visual Studio cache files
|
||||||
|
# files ending in .cache can be ignored
|
||||||
|
# but keep track of directories ending in .cache
|
||||||
|
|
||||||
|
# Others
|
||||||
|
|
||||||
|
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||||
|
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||||
|
#bower_components/
|
||||||
|
|
||||||
|
# RIA/Silverlight projects
|
||||||
|
|
||||||
|
# Backup & report files from converting an old project file
|
||||||
|
# to a newer Visual Studio version. Backup files are not needed,
|
||||||
|
# because we have git ;-)
|
||||||
|
|
||||||
|
# SQL Server files
|
||||||
|
|
||||||
|
# Business Intelligence projects
|
||||||
|
|
||||||
|
# Microsoft Fakes
|
||||||
|
|
||||||
|
# GhostDoc plugin setting file
|
||||||
|
|
||||||
|
# Node.js Tools for Visual Studio
|
||||||
|
|
||||||
|
# Typescript v1 declaration files
|
||||||
|
|
||||||
|
# Visual Studio 6 build log
|
||||||
|
|
||||||
|
# Visual Studio 6 workspace options file
|
||||||
|
|
||||||
|
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||||
|
|
||||||
|
# Visual Studio LightSwitch build output
|
||||||
|
|
||||||
|
# Paket dependency manager
|
||||||
|
|
||||||
|
# FAKE - F# Make
|
||||||
|
|
||||||
|
# JetBrains Rider
|
||||||
|
|
||||||
|
# CodeRush
|
||||||
|
|
||||||
|
# Python Tools for Visual Studio (PTVS)
|
||||||
|
|
||||||
|
# Cake - Uncomment if you are using it
|
||||||
|
# tools/**
|
||||||
|
# !tools/packages.config
|
||||||
|
|
||||||
|
# Telerik's JustMock configuration file
|
||||||
|
|
||||||
|
# BizTalk build output
|
||||||
|
|
||||||
|
# End of https://www.gitignore.io/api/visualstudio,monodevelop,csharp
|
17
MarkovGrams.sln
Normal file
17
MarkovGrams.sln
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio 2012
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkovGrams", "MarkovGrams\MarkovGrams.csproj", "{14743F58-9418-4147-9C2C-0626AD7185D3}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|x86 = Debug|x86
|
||||||
|
Release|x86 = Release|x86
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{14743F58-9418-4147-9C2C-0626AD7185D3}.Debug|x86.ActiveCfg = Debug|x86
|
||||||
|
{14743F58-9418-4147-9C2C-0626AD7185D3}.Debug|x86.Build.0 = Debug|x86
|
||||||
|
{14743F58-9418-4147-9C2C-0626AD7185D3}.Release|x86.ActiveCfg = Release|x86
|
||||||
|
{14743F58-9418-4147-9C2C-0626AD7185D3}.Release|x86.Build.0 = Release|x86
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
41
MarkovGrams/MarkovGrams.csproj
Normal file
41
MarkovGrams/MarkovGrams.csproj
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<PropertyGroup>
|
||||||
|
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||||
|
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
|
||||||
|
<ProjectGuid>{14743F58-9418-4147-9C2C-0626AD7185D3}</ProjectGuid>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<RootNamespace>MarkovGrams</RootNamespace>
|
||||||
|
<AssemblyName>MarkovGrams</AssemblyName>
|
||||||
|
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
|
||||||
|
<DebugSymbols>true</DebugSymbols>
|
||||||
|
<DebugType>full</DebugType>
|
||||||
|
<Optimize>false</Optimize>
|
||||||
|
<OutputPath>bin\Debug</OutputPath>
|
||||||
|
<DefineConstants>DEBUG;</DefineConstants>
|
||||||
|
<ErrorReport>prompt</ErrorReport>
|
||||||
|
<WarningLevel>4</WarningLevel>
|
||||||
|
<ExternalConsole>true</ExternalConsole>
|
||||||
|
<PlatformTarget>x86</PlatformTarget>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
|
||||||
|
<Optimize>true</Optimize>
|
||||||
|
<OutputPath>bin\Release</OutputPath>
|
||||||
|
<ErrorReport>prompt</ErrorReport>
|
||||||
|
<WarningLevel>4</WarningLevel>
|
||||||
|
<ExternalConsole>true</ExternalConsole>
|
||||||
|
<PlatformTarget>x86</PlatformTarget>
|
||||||
|
</PropertyGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<Reference Include="System" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<Compile Include="Program.cs" />
|
||||||
|
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||||
|
<Compile Include="NGrams.cs" />
|
||||||
|
<Compile Include="UnweightedMarkovChain.cs" />
|
||||||
|
</ItemGroup>
|
||||||
|
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||||
|
</Project>
|
85
MarkovGrams/NGrams.cs
Normal file
85
MarkovGrams/NGrams.cs
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
|
||||||
|
namespace MarkovGrams
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// A collection of methods to generate various different types of n-grams.
|
||||||
|
/// </summary>
|
||||||
|
public static class NGrams
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Generates a unique list of n-grams that the given list of words.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="words">The words to turn into n-grams.</param>
|
||||||
|
/// <param name="order">The order of n-gram to generate..</param>
|
||||||
|
/// <returns>A unique list of n-grams found in the given list of words.</returns>
|
||||||
|
public static IEnumerable<string> GenerateFlat(IEnumerable<string> words, int order)
|
||||||
|
{
|
||||||
|
List<string> results = new List<string>();
|
||||||
|
foreach(string word in words)
|
||||||
|
{
|
||||||
|
results.AddRange(GenerateFlat(word, order));
|
||||||
|
}
|
||||||
|
return results.Distinct();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Generates a unique list of n-grams from the given string.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="str">The string to n-gram-ise.</param>
|
||||||
|
/// <param name="order">The order of n-gram to generate.</param>
|
||||||
|
/// <returns>A unique list of n-grams found in the specified string.</returns>
|
||||||
|
public static IEnumerable<string> GenerateFlat(string str, int order)
|
||||||
|
{
|
||||||
|
List<string> results = new List<string>();
|
||||||
|
for(int i = 0; i < str.Length - order; i++)
|
||||||
|
{
|
||||||
|
results.Add(str.Substring(i, order));
|
||||||
|
}
|
||||||
|
return results.Distinct();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Generates a dictionary of weighted n-grams from the given list of words.
|
||||||
|
/// The key is the ngram itself, and the value is the linear weight of the ngram.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="words">The words to n-gram-ise.</param>
|
||||||
|
/// <param name="order">The order of ngrams to generate.</param>
|
||||||
|
/// <returns>The weighted dictionary of ngrams.</returns>
|
||||||
|
public static Dictionary<string, int> GenerateWeighted(IEnumerable<string> words, int order)
|
||||||
|
{
|
||||||
|
Dictionary<string, int> results = new Dictionary<string, int>();
|
||||||
|
foreach(string word in words)
|
||||||
|
{
|
||||||
|
Dictionary<string, int> wordNgrams = GenerateWeighted(word, order);
|
||||||
|
foreach(KeyValuePair<string, int> ngram in wordNgrams)
|
||||||
|
{
|
||||||
|
if(!results.ContainsKey(ngram.Key))
|
||||||
|
results[ngram.Key] = 0;
|
||||||
|
results[ngram.Key] += ngram.Value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
/// <summary>
|
||||||
|
/// Generates a dictionary of weighted n-grams from the specified string.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="str">The string to n-gram-ise.</param>
|
||||||
|
/// <param name="order">The order of n-grams to generate.</param>
|
||||||
|
/// <returns>The weighted dictionary of ngrams.</returns>
|
||||||
|
public static Dictionary<string, int> GenerateWeighted(string str, int order)
|
||||||
|
{
|
||||||
|
Dictionary<string, int> results = new Dictionary<string, int>();
|
||||||
|
for(int i = 0; i < str.Length - order; i++)
|
||||||
|
{
|
||||||
|
string ngram = str.Substring(i, order);
|
||||||
|
if(!results.ContainsKey(ngram))
|
||||||
|
results[ngram] = 0;
|
||||||
|
results[ngram]++;
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
30
MarkovGrams/Program.cs
Normal file
30
MarkovGrams/Program.cs
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
|
||||||
|
namespace MarkovGrams
|
||||||
|
{
|
||||||
|
class MainClass
|
||||||
|
{
|
||||||
|
public static void Main(string[] args)
|
||||||
|
{
|
||||||
|
if(args.Length != 3)
|
||||||
|
{
|
||||||
|
Console.WriteLine("Usage:");
|
||||||
|
Console.WriteLine(" ./MarkovGrams.exe <wordlist.txt> <order> <length>");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
string wordlistFilename = args[0];
|
||||||
|
int order = int.Parse(args[1]);
|
||||||
|
int desiredStringLength = int.Parse(args[2]);
|
||||||
|
|
||||||
|
IEnumerable<string> words = File.ReadLines(wordlistFilename);
|
||||||
|
IEnumerable<string> ngrams = NGrams.GenerateFlat(words, order);
|
||||||
|
|
||||||
|
UnweightedMarkovChain chain = new UnweightedMarkovChain(ngrams);
|
||||||
|
|
||||||
|
Console.WriteLine(chain.Generate(desiredStringLength));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
26
MarkovGrams/Properties/AssemblyInfo.cs
Normal file
26
MarkovGrams/Properties/AssemblyInfo.cs
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
using System.Reflection;
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
|
||||||
|
// Information about this assembly is defined by the following attributes.
|
||||||
|
// Change them to the values specific to your project.
|
||||||
|
|
||||||
|
[assembly: AssemblyTitle("MarkovGrams")]
|
||||||
|
[assembly: AssemblyDescription("")]
|
||||||
|
[assembly: AssemblyConfiguration("")]
|
||||||
|
[assembly: AssemblyCompany("")]
|
||||||
|
[assembly: AssemblyProduct("")]
|
||||||
|
[assembly: AssemblyCopyright("sbrl")]
|
||||||
|
[assembly: AssemblyTrademark("")]
|
||||||
|
[assembly: AssemblyCulture("")]
|
||||||
|
|
||||||
|
// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
|
||||||
|
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
|
||||||
|
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
|
||||||
|
|
||||||
|
[assembly: AssemblyVersion("1.0.*")]
|
||||||
|
|
||||||
|
// The following attributes are used to specify the signing key for the assembly,
|
||||||
|
// if desired. See the Mono documentation for more information about signing.
|
||||||
|
|
||||||
|
//[assembly: AssemblyDelaySign(false)]
|
||||||
|
//[assembly: AssemblyKeyFile("")]
|
72
MarkovGrams/UnweightedMarkovChain.cs
Normal file
72
MarkovGrams/UnweightedMarkovChain.cs
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
|
||||||
|
namespace MarkovGrams
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// An unweighted character-based markov chain.
|
||||||
|
/// </summary>
|
||||||
|
public class UnweightedMarkovChain
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// The random number generator
|
||||||
|
/// </summary>
|
||||||
|
Random rand = new Random();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The ngrams that this markov chain currently contains.
|
||||||
|
/// </summary>
|
||||||
|
List<string> ngrams;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Creates a new character-based markov chain.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="inNgrams">The ngrams to populate the new markov chain with.</param>
|
||||||
|
public UnweightedMarkovChain(IEnumerable<string> inNgrams)
|
||||||
|
{
|
||||||
|
ngrams = new List<string>(inNgrams);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns a random ngram that's currently loaded into this UnweightedMarkovChain.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>A random ngram from this UnweightMarkovChain's cache of ngrams.</returns>
|
||||||
|
public string RandomNgram()
|
||||||
|
{
|
||||||
|
return ngrams[rand.Next(0, ngrams.Count)];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Generates a new random string from the currently stored ngrams.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="length">
|
||||||
|
/// The length of ngram to generate.
|
||||||
|
/// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will
|
||||||
|
/// result in a string of length 3. Also, depending on the current ngrams this markov chain contains,
|
||||||
|
/// it may end up being cut short.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>A new random string.</returns>
|
||||||
|
public string Generate(int length)
|
||||||
|
{
|
||||||
|
string result = RandomNgram();
|
||||||
|
string lastNgram = result;
|
||||||
|
while(result.Length < length)
|
||||||
|
{
|
||||||
|
// The substring that the next ngram in the chain needs to start with
|
||||||
|
string nextStartsWith = lastNgram.Substring(1);
|
||||||
|
// Get a list of possible n-grams we could choose from next
|
||||||
|
List<string> nextNgrams = ngrams.FindAll(gram => gram.StartsWith(nextStartsWith));
|
||||||
|
// If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
|
||||||
|
if(nextNgrams.Count == 0)
|
||||||
|
break;
|
||||||
|
// Pick a random n-gram from the list
|
||||||
|
string nextNgram = nextNgrams.ElementAt(rand.Next(0, nextNgrams.Count));
|
||||||
|
// Add the last character from the n-gram to the string we're building
|
||||||
|
result += nextNgram[nextNgram.Length - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue