From f27f57f80f10d3c16f97b9dd294f59bd6233af88 Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Mon, 12 Nov 2018 19:21:49 +0000 Subject: [PATCH] Create Microsoft.Recognizers.Text demo. Does this mean there will be more types of recogniser in the future? :D --- .gitignore | 523 ++++++++++++++++++ TextRecogniserDemo.sln | 17 + TextRecogniserDemo/AIRecogniser.cs | 70 +++ TextRecogniserDemo/Program.cs | 91 +++ TextRecogniserDemo/Properties/AssemblyInfo.cs | 26 + TextRecogniserDemo/TextRecogniserDemo.csproj | 77 +++ TextRecogniserDemo/packages.config | 13 + 7 files changed, 817 insertions(+) create mode 100644 .gitignore create mode 100644 TextRecogniserDemo.sln create mode 100644 TextRecogniserDemo/AIRecogniser.cs create mode 100644 TextRecogniserDemo/Program.cs create mode 100644 TextRecogniserDemo/Properties/AssemblyInfo.cs create mode 100644 TextRecogniserDemo/TextRecogniserDemo.csproj create mode 100644 TextRecogniserDemo/packages.config diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ffdfb95 --- /dev/null +++ b/.gitignore @@ -0,0 +1,523 @@ + +# Created by https://www.gitignore.io/api/monodevelop,visualstudio,csharp,git +# Edit at https://www.gitignore.io/?templates=monodevelop,visualstudio,csharp,git + +### Csharp ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# JetBrains Rider +.idea/ +*.sln.iml + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +### Git ### +# Created by git for backups. To disable backups in Git: +# $ git config --global mergetool.keepBackup false +*.orig + +# Created by git when using merge tools for conflicts +*.BACKUP.* +*.BASE.* +*.LOCAL.* +*.REMOTE.* +*_BACKUP_*.txt +*_BASE_*.txt +*_LOCAL_*.txt +*_REMOTE_*.txt + +### MonoDevelop ### +#User Specific +*.usertasks + +#Mono Project Files +*.resources +test-results/ + +### VisualStudio ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files + +# User-specific files (MonoDevelop/Xamarin Studio) + +# Build results + +# Visual Studio 2015/2017 cache/options directory +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files + +# MSTest test Results + +# NUNIT + +# Build Results of an ATL Project + +# Benchmark Results + +# .NET Core + +# StyleCop + +# Files built by Visual Studio + +# Chutzpah Test files + +# Visual C++ cache files + +# Visual Studio profiler + +# Visual Studio Trace Files + +# TFS 2012 Local Workspace + +# Guidance Automation Toolkit + +# ReSharper is a .NET coding add-in + +# JustCode is a .NET coding add-in + +# TeamCity is a build add-in + +# DotCover is a Code Coverage Tool + +# AxoCover is a Code Coverage Tool + +# Visual Studio code coverage results + +# NCrunch + +# MightyMoose + +# Web workbench (sass) + +# Installshield output folder + +# DocProject is a documentation generator add-in + +# Click-Once directory + +# Publish Web Output +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted + +# NuGet Packages +# The packages folder can be ignored because of Package Restore +# except build/, which is used as an MSBuild target. +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files + +# Microsoft Azure Build Output + +# Microsoft Azure Emulator + +# Windows Store app package directories and files + +# Visual Studio cache files +# files ending in .cache can be ignored +# but keep track of directories ending in .cache + +# Others + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) + +# SQL Server files + +# Business Intelligence projects + +# Microsoft Fakes + +# GhostDoc plugin setting file + +# Node.js Tools for Visual Studio + +# Visual Studio 6 build log + +# Visual Studio 6 workspace options file + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) + +# Visual Studio LightSwitch build output + +# Paket dependency manager + +# FAKE - F# Make + +# JetBrains Rider + +# CodeRush personal settings + +# Python Tools for Visual Studio (PTVS) + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio + +# Telerik's JustMock configuration file + +# BizTalk build output + +# OpenCover UI analysis results + +# Azure Stream Analytics local run output + +# MSBuild Binary and Structured Log + +# NVidia Nsight GPU debugger configuration file + +# MFractors (Xamarin productivity tool) working folder + +# Local History for Visual Studio + +# End of https://www.gitignore.io/api/monodevelop,visualstudio,csharp,git diff --git a/TextRecogniserDemo.sln b/TextRecogniserDemo.sln new file mode 100644 index 0000000..bd00c56 --- /dev/null +++ b/TextRecogniserDemo.sln @@ -0,0 +1,17 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TextRecogniserDemo", "TextRecogniserDemo\TextRecogniserDemo.csproj", "{CEC384D1-4B3D-4EB8-A801-40E9845EA443}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x86 = Debug|x86 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {CEC384D1-4B3D-4EB8-A801-40E9845EA443}.Debug|x86.ActiveCfg = Debug|x86 + {CEC384D1-4B3D-4EB8-A801-40E9845EA443}.Debug|x86.Build.0 = Debug|x86 + {CEC384D1-4B3D-4EB8-A801-40E9845EA443}.Release|x86.ActiveCfg = Release|x86 + {CEC384D1-4B3D-4EB8-A801-40E9845EA443}.Release|x86.Build.0 = Release|x86 + EndGlobalSection +EndGlobal diff --git a/TextRecogniserDemo/AIRecogniser.cs b/TextRecogniserDemo/AIRecogniser.cs new file mode 100644 index 0000000..f453565 --- /dev/null +++ b/TextRecogniserDemo/AIRecogniser.cs @@ -0,0 +1,70 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text; +using Microsoft.Recognizers.Text.DateTime; + +namespace TextRecogniserDemo +{ + + public static class AIRecogniser + { + public static (DateTime, TimeSpan) RecogniseDateTimeRange(string source, out string rawString) + { + List aiResults = DateTimeRecognizer.RecognizeDateTime(source, Culture.English); + if (aiResults.Count == 0) + throw new Exception("Error: Couldn't recognise any time ranges in that source string."); + + /* Example contents of the below dictionary: + [0]: {[timex, 2018-11-11T06:15]} + [1]: {[type, datetime]} + [2]: {[value, 2018-11-11 06:15:00]} + */ + + rawString = aiResults[0].Text; + Dictionary aiResult = unwindResult(aiResults[0]); + foreach (KeyValuePair kvp in aiResult) + Console.WriteLine($"{kvp.Key}: {kvp.Value}"); + string type = aiResult["type"]; + + if (type != "datetimerange") + throw new Exception($"Error: An invalid type of {type} was encountered ('datetimerange' expected)."); + + + return ( + DateTime.Parse(aiResult["start"]), + DateTime.Parse(aiResult["end"]) - DateTime.Parse(aiResult["start"]) + ); + } + + public static DateTime RecogniseDateTime(string source, out string rawString) + { + List aiResults = DateTimeRecognizer.RecognizeDateTime(source, Culture.English); + if (aiResults.Count == 0) + throw new Exception("Error: Couldn't recognise any dates or times in that source string."); + + /* Example contents of the below dictionary: + [0]: {[timex, 2018-11-11T06:15]} + [1]: {[type, datetime]} + [2]: {[value, 2018-11-11 06:15:00]} + */ + + rawString = aiResults[0].Text; + Dictionary aiResult = unwindResult(aiResults[0]); + string type = aiResult["type"]; + if (!(new string[] { "datetime", "date", "time", "datetimerange", "daterange", "timerange" }).Contains(type)) + throw new Exception($"Error: An invalid type of {type} was encountered ('datetime' expected)."); + + + string result = Regex.IsMatch(type, @"range$") ? aiResult["start"] : aiResult["value"]; + return DateTime.Parse(result); + } + + + private static Dictionary unwindResult(ModelResult modelResult) + { + return (modelResult.Resolution["values"] as List>)[0]; + } + } +} \ No newline at end of file diff --git a/TextRecogniserDemo/Program.cs b/TextRecogniserDemo/Program.cs new file mode 100644 index 0000000..c5b11ff --- /dev/null +++ b/TextRecogniserDemo/Program.cs @@ -0,0 +1,91 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text; +using Microsoft.Recognizers.Text.DateTime; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.NumberWithUnit; +using Microsoft.Recognizers.Text.Sequence; +using Newtonsoft.Json; + +namespace TextRecogniserDemo +{ + enum AIMode { + DateTime, Number, NumberRange, + Age, Currency, Dimension, + Temperature, PhoneNumber, IPAddress, + URL, Mention, Hashtag + } + + class MainClass + { + public static void Main(string[] args) + { + Console.Error.WriteLine("Microsoft.Recognizers.Text Demo"); + Console.Error.WriteLine(" By Starbeammrainbowlabs"); + Console.Error.Write("Modes available: "); + List modes = new List(); + foreach (AIMode next in Enum.GetValues(typeof(AIMode))) + modes.Add(next.ToString()); + Console.Error.WriteLine(string.Join(", ", modes)); + + while (true) { + Console.Error.Write("\u001b[1m\u001b[33mMode:\u001b[0m "); + string modeText = Console.ReadLine(); + if (modeText == null) break; + AIMode nextMode = (AIMode)Enum.Parse(typeof(AIMode), Regex.Replace(modeText, @"[^a-zA-Z]", ""), true); + + + Console.Error.Write("\u001b[1m\u001b[32m>\u001b[0m "); + string nextLine = Console.ReadLine(); + if (nextLine == null) break; + + + List result = null; + switch (nextMode) { + case AIMode.DateTime: + result = DateTimeRecognizer.RecognizeDateTime(nextLine, Culture.English); + break; + case AIMode.Number: + result = NumberRecognizer.RecognizeNumber(nextLine, Culture.English); + break; + case AIMode.Age: + result = NumberWithUnitRecognizer.RecognizeAge(nextLine, Culture.English); + break; + case AIMode.Currency: + result = NumberWithUnitRecognizer.RecognizeCurrency(nextLine, Culture.English); + break; + case AIMode.Dimension: + result = NumberWithUnitRecognizer.RecognizeDimension(nextLine, Culture.English); + break; + case AIMode.Temperature: + result = NumberWithUnitRecognizer.RecognizeTemperature(nextLine, Culture.English); + break; + case AIMode.PhoneNumber: + result = SequenceRecognizer.RecognizePhoneNumber(nextLine, Culture.English); + break; + case AIMode.IPAddress: + result = SequenceRecognizer.RecognizeIpAddress(nextLine, Culture.English); + break; + case AIMode.URL: + result = SequenceRecognizer.RecognizeURL(nextLine, Culture.English); + break; + case AIMode.Mention: + result = SequenceRecognizer.RecognizeMention(nextLine, Culture.English); + break; + case AIMode.Hashtag: + result = SequenceRecognizer.RecognizeHashtag(nextLine, Culture.English); + break; + default: + Console.Error.WriteLine($"Error: Unrecognised mode {nextMode}."); + continue; + } + Console.WriteLine(JsonConvert.SerializeObject(result, Formatting.Indented)); + + Console.Error.WriteLine("\u001b[34m" + new string('-', Console.WindowWidth - 1) + "\u001b[0m"); + + } + } + } +} diff --git a/TextRecogniserDemo/Properties/AssemblyInfo.cs b/TextRecogniserDemo/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..156b204 --- /dev/null +++ b/TextRecogniserDemo/Properties/AssemblyInfo.cs @@ -0,0 +1,26 @@ +using System.Reflection; +using System.Runtime.CompilerServices; + +// Information about this assembly is defined by the following attributes. +// Change them to the values specific to your project. + +[assembly: AssemblyTitle("TextRecogniserDemo")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("")] +[assembly: AssemblyCopyright("sbrl")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}". +// The form "{Major}.{Minor}.*" will automatically update the build and revision, +// and "{Major}.{Minor}.{Build}.*" will update just the revision. + +[assembly: AssemblyVersion("1.0.*")] + +// The following attributes are used to specify the signing key for the assembly, +// if desired. See the Mono documentation for more information about signing. + +//[assembly: AssemblyDelaySign(false)] +//[assembly: AssemblyKeyFile("")] diff --git a/TextRecogniserDemo/TextRecogniserDemo.csproj b/TextRecogniserDemo/TextRecogniserDemo.csproj new file mode 100644 index 0000000..8af3529 --- /dev/null +++ b/TextRecogniserDemo/TextRecogniserDemo.csproj @@ -0,0 +1,77 @@ + + + + Debug + x86 + {CEC384D1-4B3D-4EB8-A801-40E9845EA443} + Exe + TextRecogniserDemo + TextRecogniserDemo + v4.7 + + + true + full + false + bin\Debug + DEBUG; + prompt + 4 + true + x86 + + + true + bin\Release + prompt + 4 + true + x86 + + + + + ..\packages\System.Collections.Immutable.1.4.0\lib\netstandard2.0\System.Collections.Immutable.dll + + + ..\packages\System.ValueTuple.4.4.0\lib\net47\System.ValueTuple.dll + + + + ..\packages\Microsoft.Recognizers.Text.1.1.3\lib\net462\Microsoft.Recognizers.Definitions.dll + + + ..\packages\Microsoft.Recognizers.Text.1.1.3\lib\net462\Microsoft.Recognizers.Text.dll + + + ..\packages\Microsoft.Recognizers.Text.Number.1.1.3\lib\net462\Microsoft.Recognizers.Text.Number.dll + + + ..\packages\Microsoft.Recognizers.Text.NumberWithUnit.1.1.3\lib\net462\Microsoft.Recognizers.Text.NumberWithUnit.dll + + + ..\packages\Microsoft.Recognizers.Text.DateTime.1.1.3\lib\net462\Microsoft.Recognizers.Text.DateTime.dll + + + ..\packages\Microsoft.Recognizers.Text.Sequence.1.1.3\lib\net462\Microsoft.Recognizers.Text.Sequence.dll + + + ..\packages\Microsoft.Recognizers.Text.Choice.1.1.3\lib\net462\Microsoft.Recognizers.Text.Choice.dll + + + ..\packages\Microsoft.Recognizers.Text.DataTypes.TimexExpression.1.1.3\lib\net462\Microsoft.Recognizers.Text.DataTypes.TimexExpression.dll + + + ..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll + + + + + + + + + + + + \ No newline at end of file diff --git a/TextRecogniserDemo/packages.config b/TextRecogniserDemo/packages.config new file mode 100644 index 0000000..0fb38c6 --- /dev/null +++ b/TextRecogniserDemo/packages.config @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file