using System; using System.Collections.Generic; using System.Linq; using MarkovGrams.Utilities; using SBRL.Algorithms; namespace MarkovGrams { /// /// An unweighted character-based markov chain. /// public class WeightedMarkovChain { private WeightedRandom wrandom = new WeightedRandom(); /// /// The ngrams that this markov chain currently contains. /// Dictionary ngrams; /// /// Creates a new character-based markov chain. /// /// The ngrams to populate the new markov chain with. public WeightedMarkovChain(IEnumerable inNgrams) { ngrams = new Dictionary(); foreach (string ngram in inNgrams) { if (ngrams.ContainsKey(ngram)) ngrams[ngram]++; else ngrams.Add(ngram, 1); } } /// /// Returns a random ngram that's currently loaded into this WeightedMarkovChain. /// /// A random ngram from this UnweightMarkovChain's cache of ngrams. public string RandomNgram() { if (wrandom.Count == 0) wrandom.SetContents(ngrams); return wrandom.Next(); } /// /// Generates a new random string from the currently stored ngrams. /// /// /// The length of ngram to generate. /// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will /// result in a string of length 3. Also, depending on the current ngrams this markov chain contains, /// it may end up being cut short. /// /// A new random string. public string Generate(int length) { string result = RandomNgram(); string lastNgram = result; while(result.Length < length) { wrandom.ClearContents(); // The substring that the next ngram in the chain needs to start with string nextStartsWith = lastNgram.Substring(1); // Get a list of possible n-grams we could choose from next Dictionary convNextNgrams = new Dictionary(); ngrams.Where(gram_data => gram_data.Key.StartsWith(nextStartsWith)) .ForEach((KeyValuePair ngramData) => convNextNgrams.Add(ngramData.Key, ngramData.Value)); // If there aren't any choices left, we can't exactly keep adding to the new string any more :-( if(convNextNgrams.Count() == 0) break; wrandom.SetContents(convNextNgrams); // Pick a random n-gram from the list string nextNgram = wrandom.Next(); // Add the last character from the n-gram to the string we're building result += nextNgram[nextNgram.Length - 1]; lastNgram = nextNgram; } wrandom.ClearContents(); return result; } } }