Experiments into markov chains, n-grams, and text generation.

WeightedMarkovChain.cs 2.7KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using MarkovGrams.Utilities;
  5. using SBRL.Algorithms;
  6. namespace MarkovGrams
  7. {
  8. /// <summary>
  9. /// An unweighted character-based markov chain.
  10. /// </summary>
  11. public class WeightedMarkovChain
  12. {
  13. private WeightedRandom<string> wrandom = new WeightedRandom<string>();
  14. /// <summary>
  15. /// The ngrams that this markov chain currently contains.
  16. /// </summary>
  17. Dictionary<string, double> ngrams;
  18. /// <summary>
  19. /// Creates a new character-based markov chain.
  20. /// </summary>
  21. /// <param name="inNgrams">The ngrams to populate the new markov chain with.</param>
  22. public WeightedMarkovChain(IEnumerable<string> inNgrams)
  23. {
  24. ngrams = new Dictionary<string, double>();
  25. foreach (string ngram in inNgrams)
  26. {
  27. if (ngrams.ContainsKey(ngram))
  28. ngrams[ngram]++;
  29. else
  30. ngrams.Add(ngram, 1);
  31. }
  32. }
  33. /// <summary>
  34. /// Returns a random ngram that's currently loaded into this WeightedMarkovChain.
  35. /// </summary>
  36. /// <returns>A random ngram from this UnweightMarkovChain's cache of ngrams.</returns>
  37. public string RandomNgram()
  38. {
  39. if (wrandom.Count == 0)
  40. wrandom.SetContents(ngrams);
  41. return wrandom.Next();
  42. }
  43. /// <summary>
  44. /// Generates a new random string from the currently stored ngrams.
  45. /// </summary>
  46. /// <param name="length">
  47. /// The length of ngram to generate.
  48. /// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will
  49. /// result in a string of length 3. Also, depending on the current ngrams this markov chain contains,
  50. /// it may end up being cut short.
  51. /// </param>
  52. /// <returns>A new random string.</returns>
  53. public string Generate(int length)
  54. {
  55. string result = RandomNgram();
  56. string lastNgram = result;
  57. while(result.Length < length)
  58. {
  59. wrandom.ClearContents();
  60. // The substring that the next ngram in the chain needs to start with
  61. string nextStartsWith = lastNgram.Substring(1);
  62. // Get a list of possible n-grams we could choose from next
  63. Dictionary<string, double> convNextNgrams = new Dictionary<string, double>();
  64. ngrams.Where(gram_data => gram_data.Key.StartsWith(nextStartsWith))
  65. .ForEach((KeyValuePair<string, double> ngramData) => convNextNgrams.Add(ngramData.Key, ngramData.Value));
  66. // If there aren't any choices left, we can't exactly keep adding to the new string any more :-(
  67. if(convNextNgrams.Count() == 0)
  68. break;
  69. wrandom.SetContents(convNextNgrams);
  70. // Pick a random n-gram from the list
  71. string nextNgram = wrandom.Next();
  72. // Add the last character from the n-gram to the string we're building
  73. result += nextNgram[nextNgram.Length - 1];
  74. lastNgram = nextNgram;
  75. }
  76. wrandom.ClearContents();
  77. return result;
  78. }
  79. }
  80. }