using System; using System.Collections.Generic; using System.Linq; namespace MarkovGrams { /// /// An unweighted character-based markov chain. /// public class UnweightedMarkovChain { /// /// The random number generator /// private Random rand = new Random(); /// /// The ngrams that this markov chain currently contains. /// private List ngrams; /// /// Whether to always start generating a new word from an n-gram that starts with /// an uppercase letter. /// public bool StartOnUppercase = false; /// /// The generation mode to use when running the Markov Chain. /// /// /// The input n-grams must have been generated using the same mode specified here. /// public GenerationMode Mode { get; private set; } = GenerationMode.CharacterLevel; /// /// Creates a new character-based markov chain. /// /// The ngrams to populate the new markov chain with. public UnweightedMarkovChain(IEnumerable inNgrams, GenerationMode inMode) { ngrams = new List(inNgrams); Mode = inMode; } /// /// Returns a random ngram that's currently loaded into this UnweightedMarkovChain. /// /// A random ngram from this UnweightMarkovChain's cache of ngrams. public string RandomNgram() { IEnumerable validNGrams = StartOnUppercase ? ngrams.Where((ngram) => char.IsUpper(ngram[0])) : ngrams; if (validNGrams.Count() == 0) throw new Exception($"Error: No valid starting ngrams were found (StartOnUppercase: {StartOnUppercase})."); return validNGrams.ElementAt(rand.Next(0, validNGrams.Count())); } /// /// Generates a new random string from the currently stored ngrams. /// /// /// The length of ngram to generate. /// Note that this is a target, not a fixed value - e.g. passing 2 when the n-gram order is 3 will /// result in a string of length 3. Also, depending on the current ngrams this markov chain contains, /// it may end up being cut short. /// /// A new random string. public string Generate(int length) { string result = RandomNgram(); string lastNgram = result; while(result.Length < length) { // The substring that the next ngram in the chain needs to start with string nextStartsWith = Mode == GenerationMode.CharacterLevel ? lastNgram.Substring(1) : lastNgram.Split(' ')[0]; // Get a list of possible n-grams we could choose from next List nextNgrams = ngrams.FindAll(gram => gram.StartsWith(nextStartsWith)); // If there aren't any choices left, we can't exactly keep adding to the new string any more :-( if(nextNgrams.Count == 0) break; // Pick a random n-gram from the list string nextNgram = nextNgrams.ElementAt(rand.Next(0, nextNgrams.Count)); // Add the last character from the n-gram to the string we're building if (Mode == GenerationMode.CharacterLevel) result += nextNgram[nextNgram.Length - 1]; else result += string.Join(" ", nextNgram.Split(' ').Skip(1)); lastNgram = nextNgram; } return result; } } }