Finish the initial HTML implementation.

2024-11-22 06:23:02 +00:00 · 2019-07-28 17:24:21 +01:00 · 2019-07-28 17:24:21 +01:00 · 14fca32a5e
commit 14fca32a5e
parent 59a0289b3a
12 changed files with 746 additions and 14 deletions
--- a/PolyFeed/FeedBuilder.cs
+++ b/PolyFeed/FeedBuilder.cs
@ -1,9 +1,13 @@
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Net;
 using System.Text;
 using System.Threading.Tasks;
 using System.Xml;
+using Fizzler.Systems.HtmlAgilityPack;
+using HtmlAgilityPack;
+using Microsoft.SyndicationFeed;
 using Microsoft.SyndicationFeed.Atom;

 namespace PolyFeed
@ -22,9 +26,68 @@ namespace PolyFeed
 		public async Task AddSource(FeedSource source) {
 			WebResponse response = await WebRequest.Create(source.Url).GetResponseAsync();

-			using StreamReader reader = new StreamReader(response.GetResponseStream());


+			// Write the header
+			await feed.WriteGenerator("Polyfeed", "https://gitlab.com/sbrl/PolyFeed.git", Program.getProgramVersion());
+			await feed.WriteId(source.Url);
+			string lastModified = response.Headers.Get("last-modified");
+			if (string.IsNullOrWhiteSpace(lastModified))
+				await feed.WriteUpdated(DateTimeOffset.Now);
+			else
+				await feed.WriteUpdated(DateTimeOffset.Parse(lastModified));
+
+			string contentType = response.Headers.Get("content-type");
+
+			switch (source.SourceType) {
+				case SourceType.HTML:
+					await AddSourceHtml(source, response);
+					break;
+				default:
+					throw new NotImplementedException($"Error: The source type {source.SourceType} hasn't been implemented yet.");
+			}
+		}
+
+		private async Task AddSourceHtml(FeedSource source, WebResponse response) {
+			HtmlDocument html = new HtmlDocument();
+			using (StreamReader reader = new StreamReader(response.GetResponseStream()))
+				html.LoadHtml(await reader.ReadToEndAsync());
+
+			HtmlNode document = html.DocumentNode;
+
+			await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Title, document));
+			await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Subtitle, document));
+
+			foreach (HtmlNode nextNode in document.QuerySelectorAll(source.EntrySelector)) {
+				HtmlNode urlNode = nextNode.QuerySelector(source.EntryUrlSelector);
+				string url = source.EntryUrlAttribute == string.Empty ? 
+					urlNode.InnerText : urlNode.Attributes[source.EntryUrlAttribute].DeEntitizeValue;
+
+
+				SyndicationItem nextItem = new SyndicationItem() {
+					Id = url,
+					Title = ReferenceSubstitutor.Replace(source.EntryTitle, nextNode),
+					Description = ReferenceSubstitutor.Replace(source.EntryContent, nextNode)
+				};
+
+				if (source.EntryPublishedSelector != string.Empty) {
+					HtmlNode publishedNode = nextNode.QuerySelector(source.EntryPublishedSelector);
+					nextItem.Published = DateTime.Parse(
+						source.EntryPublishedAttribute == string.Empty
+							? publishedNode.InnerText
+							: publishedNode.Attributes[source.EntryPublishedAttribute].DeEntitizeValue
+					);
+
+				}
+				if (source.EntryPublishedSelector != string.Empty) {
+					HtmlNode lastUpdatedNode = nextNode.QuerySelector(source.EntryLastUpdatedSelector);
+					nextItem.Published = DateTime.Parse(
+						source.EntryLastUpdatedAttribute == string.Empty
+							? lastUpdatedNode.InnerText
+							: lastUpdatedNode.Attributes[source.EntryLastUpdatedAttribute].DeEntitizeValue
+					);
+				}
+			}
 		}
 	}
 }
--- a/PolyFeed/FeedSource.cs
+++ b/PolyFeed/FeedSource.cs
@ -1,4 +1,5 @@
 using System;
+
 namespace PolyFeed
 {
 	public enum SourceType { HTML, XML, JSON };
@ -10,31 +11,43 @@ namespace PolyFeed
 		/// </summary>
 		/// <value>The URL.</value>
 		public string Url { get; set; }
-		/// <summary>
-		/// The title of the feed.
-		/// </summary>
-		public string Title { get; set; }
+
 		/// <summary>
 		/// The type of source document to expect.
 		/// </summary>
 		public SourceType SourceType { get; set; }

+		/// <summary>
+		/// The title of the feed.
+		/// Supports the same {} syntax as <see cref="EntryTitle" />.
+		/// </summary>
+		public string Title { get; set; }
+		/// <summary>
+		/// The subtitle of the feed.
+		/// Supports the same {} syntax as <see cref="EntryTitle" />.
+		/// </summary>
+		/// <value>The subtitle.</value>
+		public string Subtitle { get; set; }
+
+
+		#region Entries
+
 		/// <summary>
 		/// A selector that matches against an element that contains the URL that an
 		/// entry should link to.
 		/// Relative to the element selected by <see cref="EntrySelector" />.
 		/// </summary>
-		public string UrlSelector { get; set; }
+		public string EntryUrlSelector { get; set; }
 		/// <summary>
-		/// The name of the attribute on the element selected by <see cref="UrlSelector" />.
+		/// The name of the attribute on the element selected by <see cref="EntryUrlSelector" />.
 		/// Set to an empty string to select the content of the element instead of the 
 		/// content of an attribute.
 		/// </summary>
-		public string UrlElementAttribute { get; set; } = "";
+		public string EntryUrlAttribute { get; set; } = "";

 		/// <summary>
-		/// The selector that specifies the location in the object model of nodes that should 
-		/// be added to the feed.
+		/// The selector that specifies the location of nodes in the object model that 
+		/// should be added to the feed.
 		/// The format varies depending on the <see cref="SourceType" />.
 		///  - HTML: CSS selector (e.g. main > article)
 		///  - XML: XPath (e.g. //element_name)
@ -52,5 +65,29 @@ namespace PolyFeed
 		/// Same as <see cref="EntryTitle" />, but for the body of an entry. HTML is allowed.
 		/// </summary>
 		public string EntryContent { get; set; }
+
+		/// <summary>
+		/// The selector for the node that contains the date published for an entry.
+		/// </summary>
+		public string EntryPublishedSelector { get; set; }
+
+		/// <summary>
+		/// The name of the attribute that contains the date published for an entry.
+		/// Set to <see cref="string.Empty" /> to use the content of the node itself.
+		/// </summary>
+		public string EntryPublishedAttribute { get; set; }
+
+		/// <summary>
+		/// Same as <see cref="EntryPublishedSelector" />, but for the last updated.
+		/// If not specified, the last updated will be omitted.
+		/// </summary>
+		public string EntryLastUpdatedSelector { get; set; }
+		/// <summary>
+		/// Same as <see cref="EntryPublishedAttribute" />.
+		/// </summary>
+		public string EntryLastUpdatedAttribute { get; set; }
+
+		#endregion
+
 	}
 }
--- a/PolyFeed/PolyFeed.csproj
+++ b/PolyFeed/PolyFeed.csproj
@ -34,7 +34,7 @@
      <HintPath>..\packages\Fizzler.1.2.0\lib\netstandard2.0\Fizzler.dll</HintPath>
    </Reference>
    <Reference Include="HtmlAgilityPack">
-      <HintPath>..\packages\HtmlAgilityPack.1.11.9\lib\Net45\HtmlAgilityPack.dll</HintPath>
+      <HintPath>..\packages\HtmlAgilityPack.1.11.12\lib\Net45\HtmlAgilityPack.dll</HintPath>
    </Reference>
    <Reference Include="Microsoft.Win32.Primitives">
      <HintPath>..\packages\Microsoft.Win32.Primitives.4.3.0\lib\net46\Microsoft.Win32.Primitives.dll</HintPath>
@ -136,10 +136,20 @@
    <Compile Include="Properties\AssemblyInfo.cs" />
    <Compile Include="FeedBuilder.cs" />
    <Compile Include="FeedSource.cs" />
+    <Compile Include="Salamander.Core\Lexer.cs" />
+    <Compile Include="Salamander.Core\LexerRule.cs" />
+    <Compile Include="Salamander.Core\LexerToken.cs" />
+    <Compile Include="Salamander.Core\Ansi.cs" />
+    <Compile Include="SubstitutionLexer.cs" />
+    <Compile Include="Salamander.Core\LexerPool.cs" />
+    <Compile Include="ReferenceSubstitutor.cs" />
  </ItemGroup>
  <ItemGroup>
    <None Include="packages.config" />
  </ItemGroup>
+  <ItemGroup>
+    <Folder Include="Salamander.Core\" />
+  </ItemGroup>
  <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
  <Import Project="..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets" Condition="Exists('..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets')" />
 </Project>
--- a/PolyFeed/Program.cs
+++ b/PolyFeed/Program.cs
@ -3,7 +3,7 @@ using System.Collections.Generic;
 using System.IO;
 using System.Reflection;

-namespace ProjectNamespace
+namespace PolyFeed
 {
 	internal class Settings
 	{
@ -67,7 +67,7 @@ namespace ProjectNamespace

 		#region Helper Methods

-		private static string getProgramVersion()
+		public static string getProgramVersion()
 		{
 			Version version = Assembly.GetExecutingAssembly().GetName().Version;
 			return $"{version.Major}.{version.Minor}";
--- a/PolyFeed/ReferenceSubstitutor.cs
+++ b/PolyFeed/ReferenceSubstitutor.cs
@ -0,0 +1,44 @@
+using System;
+using System.Text;
+using Fizzler.Systems.HtmlAgilityPack;
+using HtmlAgilityPack;
+using Salamander.Core.Lexer;
+
+namespace PolyFeed
+{
+	internal static class ReferenceSubstitutor {
+		private static LexerPool<SubstitutionLexer, SubstitutionToken> lexerPool = new LexerPool<SubstitutionLexer, SubstitutionToken>();
+
+		public static string Replace(string inputString, HtmlNode rootElement)
+		{
+			StringBuilder result = new StringBuilder();
+			SubstitutionLexer lexer = lexerPool.AcquireLexer();
+			lexer.Initialise(inputString);
+
+			foreach (LexerToken<SubstitutionToken> nextToken in lexer.TokenStream())
+			{
+				switch (nextToken.Type) {
+					case SubstitutionToken.BraceOpen:
+						lexer.SaveRuleStates();
+						lexer.EnableRule(SubstitutionToken.Identifier);
+						lexer.DisableRule(SubstitutionToken.Text);
+						break;
+					case SubstitutionToken.BraceClose:
+						lexer.RestoreRuleStates();
+						break;
+
+					case SubstitutionToken.Text:
+						result.Append(nextToken.Value);
+						break;
+
+					case SubstitutionToken.Identifier:
+						result.Append(rootElement.QuerySelector(nextToken.Value));
+						break;
+				}
+			}
+			lexerPool.ReleaseLexer(lexer);
+
+			return result.ToString();
+		}
+	}
+}
--- a/PolyFeed/Salamander.Core/Ansi.cs
+++ b/PolyFeed/Salamander.Core/Ansi.cs
@ -0,0 +1,49 @@
+using System;
+
+namespace Salamander.Core.Helpers
+{
+	public static class Ansi
+	{
+		/// <summary>
+		/// Whether we should *actually* emit ANSI escape codes or not.
+		/// Useful when we want to output to a log file, for example.
+		/// </summary>
+		public static bool Enabled { get; set; } = true;
+
+		// Solution on how to output ANSI escape codes in C# from here:
+		// https://www.jerriepelser.com/blog/using-ansi-color-codes-in-net-console-apps
+		public static string Reset => Enabled ? "\u001b[0m" : "";
+		public static string HiCol => Enabled ? "\u001b[1m" : "";
+		public static string Underline => Enabled ? "\u001b[4m" : "";
+		public static string Inverse => Enabled ? "\u001b[7m" : "";
+
+		public static string FBlack => Enabled ? "\u001b[30m" : "";
+		public static string FRed => Enabled ? "\u001b[31m" : "";
+		public static string FGreen => Enabled ? "\u001b[32m" : "";
+		public static string FYellow => Enabled ? "\u001b[33m" : "";
+		public static string FBlue => Enabled ? "\u001b[34m" : "";
+		public static string FMagenta => Enabled ? "\u001b[35m" : "";
+		public static string FCyan => Enabled ? "\u001b[36m" : "";
+		public static string FWhite => Enabled ? "\u001b[37m" : "";
+
+		public static string BBlack => Enabled ? "\u001b[40m" : "";
+		public static string BRed => Enabled ? "\u001b[41m" : "";
+		public static string BGreen => Enabled ? "\u001b[42m" : "";
+		public static string BYellow => Enabled ? "\u001b[43m" : "";
+		public static string BBlue => Enabled ? "\u001b[44m" : "";
+		public static string BMagenta => Enabled ? "\u001b[45m" : "";
+		public static string BCyan => Enabled ? "\u001b[46m" : "";
+		public static string BWhite => Enabled ? "\u001b[47m" : "";
+
+		// Thanks to http://ascii-table.com/ansi-escape-sequences.php for the following ANSI escape sequences
+		public static string Up(int lines = 1) => Enabled ? $"\u001b[{lines}A" : "";
+		public static string Down(int lines = 1) => Enabled ? $"\u001b[{lines}B" : "";
+		public static string Right(int lines = 1) => Enabled ? $"\u001b[{lines}C" : "";
+		public static string Left(int lines = 1) => Enabled ? $"\u001b[{lines}D" : "";
+
+		//public static string JumpTo(Vector2 pos) => $"\u001b[{pos.Y};{pos.X}H" : "";
+
+		public static string CursorPosSave => Enabled ? $"\u001b[s" : "";
+		public static string CursorPosRestore => Enabled ? $"\u001b[u" : "";
+	}
+}
--- a/PolyFeed/Salamander.Core/Lexer.cs
+++ b/PolyFeed/Salamander.Core/Lexer.cs
@ -0,0 +1,328 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+using Salamander.Core.Helpers;
+
+namespace Salamander.Core.Lexer
+{
+	public class Lexer<TokenType>
+	{
+		/// <summary>
+		/// The rules that should be used during the lexing process.
+		/// </summary>
+		public List<LexerRule<TokenType>> Rules { get; private set; } = new List<LexerRule<TokenType>>();
+		/// <summary>
+		/// Tokens in this list will be matched against, but not emitted by the lexer
+		/// into the main token stream.
+		/// Useful for catching and disposing of sequences of characters you don't want escaping
+		/// or breaking your parser.
+		/// </summary>
+		public List<TokenType> IgnoreTokens { get; private set; } = new List<TokenType>();
+
+		/// <summary>
+		/// Whether the lexer should be verbose and log a bunch of debugging information 
+		/// to the console.
+		/// </summary>
+		public bool Verbose { get; set; } = false;
+
+		/// <summary>
+		/// The number of the line that currently being scanned.
+		/// </summary>
+		public int CurrentLineNumber { get; private set; } = 0;
+		/// <summary>
+		/// The number of characters on the current line that have been scanned.
+		/// </summary>
+		/// <value>The current line position.</value>
+		public int CurrentLinePos { get; private set; } = 0;
+		/// <summary>
+		/// The total number of characters currently scanned by this lexer instance.
+		/// Only updated every newline!
+		/// </summary>
+		public int TotalCharsScanned { get; private set; } = 0;
+
+		/// <summary>
+		/// The internal stream that we should read from when lexing.
+		/// </summary>
+		private StreamReader textStream;
+
+		/// <summary>
+		/// A stack of rule states.
+		/// Whether rules are enabled or disabled can be recursively saved and restored - 
+		/// this <see cref="Stack{T}" /> is how the lexer saves this information.
+		/// </summary>
+		private Stack<Dictionary<LexerRule<TokenType>, bool>> EnabledStateStack = new Stack<Dictionary<LexerRule<TokenType>, bool>>();
+
+		/// <summary>
+		/// Creates a new <see cref="Lexer{TokenType}" />, optionally containing the given 
+		/// <see cref="LexerRule{TokenType}" /> instances.
+		/// </summary>
+		/// <param name="initialRules">The rules to add to the new <see cref="Lexer{TokenType}" />.</param>
+		public Lexer(params LexerRule<TokenType>[] initialRules)
+		{
+			AddRules(initialRules);
+		}
+
+		/// <summary>
+		/// Adds a single lexing rule to the <see cref="Lexer{TokenType}" />.
+		/// </summary>
+		/// <param name="newRule">The rule to add.</param>
+		public void AddRule(LexerRule<TokenType> newRule)
+			=> Rules.Add(newRule);
+		/// <summary>
+		/// Adds a bunch of lexing rules to the <see cref="Lexer{TokenType}" />.
+		/// </summary>
+		/// <param name="newRules">The rules to add.</param>
+		public void AddRules(IEnumerable<LexerRule<TokenType>> newRules)
+			=> Rules.AddRange(newRules);
+
+		/// <summary>
+		/// Reinitialises the parser with a new input stream.
+		/// </summary>
+		/// <remarks>
+		/// Child classes should override this method to do their own state initialisation,
+		/// as lexers MAY be re-used on multiple input streams.
+		/// Implementors must be careful not to forget to call this base method though.
+		/// </remarks>
+		/// <param name="reader">The <see cref="StreamReader"/> to use as the new input stream..</param>
+		public virtual void Initialise(StreamReader reader)
+		{
+			// Reset the counters
+			CurrentLineNumber = 0;
+			CurrentLinePos = 0;
+			TotalCharsScanned = 0;
+
+			// Reset the state stack
+			EnabledStateStack.Clear();
+
+			// Re-enable all rules
+			EnableAllRules();
+
+			textStream = reader;
+		}
+		public void Initialise(string input)
+		{
+			MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(input));
+			Initialise(new StreamReader(stream));
+		}
+
+		/// <summary>
+		/// Performs the lexing process itself in an incremental manner.
+		/// Note that a single Lexer may only do a single lex at a time - even if it's the 
+		/// same document multiple times over.
+		/// </summary>
+		/// <returns>A stream of lexical tokens.</returns>
+		public IEnumerable<LexerToken<TokenType>> TokenStream()
+		{
+			string nextLine;
+			List<LexerToken<TokenType>> matches = new List<LexerToken<TokenType>>();
+			while ((nextLine = textStream.ReadLine()) != null)
+			{
+				CurrentLinePos = 0;
+
+				while (CurrentLinePos < nextLine.Length)
+				{
+					matches.Clear();
+					foreach (LexerRule<TokenType> rule in Rules)
+					{
+						if (!rule.Enabled) continue;
+
+						Match nextMatch = rule.RegEx.Match(nextLine, CurrentLinePos);
+						if (!nextMatch.Success) continue;
+
+						matches.Add(
+							new LexerToken<TokenType>(rule, nextMatch)
+							{
+								LineNumber = CurrentLineNumber,
+								ColumnNumber = nextMatch.Index
+							}
+						);
+					}
+
+					if (matches.Count == 0)
+					{
+						string unknownTokenContent = nextLine.Substring(CurrentLinePos);
+						if (Verbose) Console.WriteLine($"{Ansi.FRed}[Unknown Token: No matches found for this line]{Ansi.Reset} {0}", unknownTokenContent);
+						yield return new LexerToken<TokenType>(unknownTokenContent)
+						{
+							LineNumber = CurrentLineNumber,
+							ColumnNumber = CurrentLinePos
+						};
+						break;
+					}
+
+					matches.Sort((LexerToken<TokenType> a, LexerToken<TokenType> b) => {
+						// Match of offset position position
+						int result = a.ColumnNumber - b.ColumnNumber;
+						// If they both start at the same position, then go with highest priority one
+						if (result == 0)
+							result = b.Rule.Priority - a.Rule.Priority;
+						// Failing that, try the longest one
+						if (result == 0)
+							result = b.RegexMatch.Length - a.RegexMatch.Length;
+
+						return result;
+					});
+					LexerToken<TokenType> selectedToken = matches[0];
+					int selectedTokenOffset = nextLine.IndexOf(selectedToken.RegexMatch.Value, CurrentLinePos) - CurrentLinePos;
+
+					if (selectedTokenOffset > 0)
+					{
+						string extraTokenContent = nextLine.Substring(CurrentLinePos, selectedTokenOffset);
+						int unmatchedLinePos = CurrentLinePos;
+						CurrentLinePos += selectedTokenOffset;
+						if (Verbose) Console.WriteLine($"{Ansi.FRed}[Unmatched content]{Ansi.Reset} '{extraTokenContent}'");
+						// Return the an unknown token, but only if we're not meant to be ignoring them
+						if (!IgnoreTokens.Contains((TokenType)Enum.ToObject(typeof(TokenType), 0)))
+						{
+							yield return new LexerToken<TokenType>(extraTokenContent)
+							{
+								LineNumber = CurrentLineNumber,
+								ColumnNumber = unmatchedLinePos
+							};
+						}
+					}
+
+					CurrentLinePos += selectedToken.RegexMatch.Length;
+					if (Verbose) Console.WriteLine($"{(IgnoreTokens.Contains(selectedToken.Type) ? Ansi.FBlack : Ansi.FGreen)}{selectedToken}{Ansi.Reset}");
+
+					// Yield the token, but only if we aren't supposed to be ignoring it
+					if (IgnoreTokens.Contains(selectedToken.Type))
+						continue;
+					yield return selectedToken;
+				}
+
+				if (Verbose) Console.WriteLine($"{Ansi.FBlue}[Lexer]{Ansi.Reset} Next line");
+				CurrentLineNumber++;
+				TotalCharsScanned += CurrentLinePos;
+			}
+		}
+
+
+		#region Rule Management
+
+		/// <summary>
+		/// Enables all <see cref="LexerRule{TokenType}" />s currently registered against
+		/// this Lexer.
+		/// </summary>
+		public void EnableAllRules() => EnableRulesByPrefix("");
+		/// <summary>
+		/// Disables all <see cref="LexerRule{TokenType}" />s currently registered against
+		/// this Lexer.
+		/// </summary>
+		public void DisableAllRules() => DisableRulesByPrefix("");
+
+		/// <summary>
+		/// Enables the rule that matches against the given <see cref="TokenType" />.
+		/// </summary>
+		/// <param name="type">The token type to use to find the rule to enable.</param>
+		public void EnableRule(TokenType type) => SetRule(type, true);
+		/// <summary>
+		/// Disables the rule that matches against the given <see cref="TokenType" />.
+		/// </summary>
+		/// <param name="type">The token type to use to find the rule to disable.</param>
+		public void DisableRule(TokenType type) => SetRule(type, false);
+
+		/// <summary>
+		/// Sets the enabled status of the rule that matches against the given 
+		/// <see cref="TokenType" /> to the given state.
+		/// </summary>
+		/// <param name="type">The <see cref="TokenType" /> to use to find the rule to 
+		/// sets the enabled state of.</param>
+		/// <param name="state">Whether to enable or disable the rule. <see langword="true"/> = enable it, <see langword="false"/> = disable it.</param>
+		public void SetRule(TokenType type, bool state)
+		{
+			foreach (LexerRule<TokenType> rule in Rules)
+			{
+				// We have to do a string comparison here because of the generic type we're using in multiple nested
+				// classes
+				if (Enum.GetName(rule.Type.GetType(), rule.Type) == Enum.GetName(type.GetType(), type))
+				{
+					rule.Enabled = state;
+					return;
+				}
+			}
+		}
+
+		/// <summary>
+		/// Toggles the enabled status of multiple rules by finding rules that generate 
+		/// tokens whose name begins with a specific substring.
+		/// </summary>
+		/// <param name="tokenTypePrefix">The prefix to use when finding rules to toggle.</param>
+		public void ToggleRulesByPrefix(string tokenTypePrefix)
+		{
+			foreach (LexerRule<TokenType> rule in Rules)
+			{
+				// We have to do a string comparison here because of the generic type we're using in multiple nested
+				// classes
+				if (Enum.GetName(rule.Type.GetType(), rule.Type).StartsWith(tokenTypePrefix, StringComparison.CurrentCulture))
+					rule.Enabled = !rule.Enabled;
+			}
+		}
+		/// <summary>
+		/// Enables multiple rules by finding rules that generate 
+		/// tokens whose name begins with a specific substring.
+		/// </summary>
+		/// <param name="tokenTypePrefix">The prefix to use when finding rules to enable.</param>
+		public void EnableRulesByPrefix(string tokenTypePrefix)
+			=> SetRulesByPrefix(tokenTypePrefix, true);
+		/// <summary>
+		/// Disables multiple rules by finding rules that generate 
+		/// tokens whose name begins with a specific substring.
+		/// </summary>
+		/// <param name="tokenTypePrefix">The prefix to use when finding rules to disable.</param>
+		public void DisableRulesByPrefix(string tokenTypePrefix)
+			=> SetRulesByPrefix(tokenTypePrefix, false);
+
+		/// <summary>
+		/// Set the enabled status of multiple rules by finding rules that generate 
+		/// tokens whose name begins with a specific substring.
+		/// </summary>
+		/// <param name="tokenTypePrefix">The prefix to use when finding rules to set the 
+		/// status of.</param>
+		public void SetRulesByPrefix(string tokenTypePrefix, bool state)
+		{
+			foreach (LexerRule<TokenType> rule in Rules)
+			{
+				// We have to do a string comparison here because of the generic type we're using in multiple nested
+				// classes
+				if (Enum.GetName(rule.Type.GetType(), rule.Type).StartsWith(tokenTypePrefix, StringComparison.CurrentCulture))
+				{
+					//if(Verbose) Console.WriteLine($"{Ansi.FBlue}[Lexer/Rules] {Ansi.FCyan}Setting {rule.Type} to {state}");
+					rule.Enabled = state;
+				}
+			}
+		}
+
+		/// <summary>
+		/// Saves the current rule states (i.e. whether they are enabled or not) as a snapshot to an
+		/// internal stack.
+		/// </summary>
+		public void SaveRuleStates()
+		{
+			Dictionary<LexerRule<TokenType>, bool> states = new Dictionary<LexerRule<TokenType>, bool>();
+			foreach (LexerRule<TokenType> nextRule in Rules)
+				states[nextRule] = nextRule.Enabled;
+
+			EnabledStateStack.Push(states);
+		}
+		/// <summary>
+		/// Restores the top-most rule states snapshot from the internal stack.
+		/// </summary>
+		/// <exception cref="InvalidOperationException">Thrown if there aren't any states left on the stack to restore.</exception>
+		public void RestoreRuleStates()
+		{
+			if (EnabledStateStack.Count < 1)
+				throw new InvalidOperationException("Error: Can't restore the lexer rule states when no states have been saved!");
+
+			Dictionary<LexerRule<TokenType>, bool> states = EnabledStateStack.Pop();
+			foreach (KeyValuePair<LexerRule<TokenType>, bool> nextRulePair in states)
+				nextRulePair.Key.Enabled = nextRulePair.Value;
+		}
+
+
+		#endregion
+
+	}
+}
--- a/PolyFeed/Salamander.Core/LexerPool.cs
+++ b/PolyFeed/Salamander.Core/LexerPool.cs
@ -0,0 +1,34 @@
+using System;
+using System.Collections.Generic;
+
+namespace Salamander.Core.Lexer
+{
+	/// <summary>
+	/// Represents a pool of reusable <see cref="Lexer{TokenType}"/>s.
+	/// Useful to avoid memory churn when lexing lots of different input streams.
+	/// </summary>
+	public class LexerPool<T, E> where T : Lexer<E>, new()
+	{
+		private List<T> freeLexers = new List<T>();
+
+		public LexerPool()
+		{
+		}
+
+		public T AcquireLexer()
+		{
+			if (freeLexers.Count > 0)
+			{
+				T lexer = freeLexers[0];
+				freeLexers.Remove(lexer);
+				return lexer;
+			}
+			return new T();
+		}
+
+		public void ReleaseLexer(T lexer)
+		{
+			freeLexers.Add(lexer);
+		}
+	}
+}
--- a/PolyFeed/Salamander.Core/LexerRule.cs
+++ b/PolyFeed/Salamander.Core/LexerRule.cs
@ -0,0 +1,52 @@
+using System;
+using System.Text.RegularExpressions;
+
+namespace Salamander.Core.Lexer
+{
+	public class LexerRule<TokenType>
+	{
+		/// <summary>
+		/// The token type that a match against this rule should generate.
+		/// </summary>
+		public readonly TokenType Type;
+		/// <summary>
+		/// The regular expression to use to find matches.
+		/// </summary>
+		public readonly Regex RegEx;
+		/// <summary>
+		/// The priority of this rule.
+		/// </summary>
+		/// <remarks>
+		/// If there are multiple matches, then the one with the highest priority will be matched 
+		/// against first.
+		/// Failing that, the longest match will be taken first.
+		/// Note that even if a match has a higher priority, a match from a lower priority rule 
+		/// will be used instead if it occurs earlier in the source, as this will result in fewer 
+		/// unmatched characters.
+		/// </remarks>
+		public int Priority { get; set; } = 0;
+		/// <summary>
+		/// Whether this rule is currently enabled or not. This can be changed on-the-fly whilst lexing.
+		/// Sometimes useful when handling more complicated logic.
+		/// Be careful though, as if you start needing this, perhaps you should evaluate whether 
+		/// utilising the fuller capabilities of the parser would be more appropriate instead.
+		/// </summary>
+		public bool Enabled { get; set; } = true;
+
+		public LexerRule(TokenType inName, string inRegEx, RegexOptions inRegexOptions = RegexOptions.None, int inPriority = 0)
+		{
+			if (!typeof(TokenType).IsEnum)
+				throw new ArgumentException($"Error: inName must be an enum - {typeof(TokenType)} passed");
+
+			Type = inName;
+			RegEx = new Regex(inRegEx, inRegexOptions | RegexOptions.Compiled);
+			Priority = inPriority;
+		}
+
+		public bool Toggle()
+		{
+			Enabled = !Enabled;
+			return Enabled;
+		}
+	}
+}
--- a/PolyFeed/Salamander.Core/LexerToken.cs
+++ b/PolyFeed/Salamander.Core/LexerToken.cs
@ -0,0 +1,76 @@
+using System;
+using System.Text.RegularExpressions;
+
+namespace Salamander.Core.Lexer
+{
+	public class LexerToken<TokenType>
+	{
+		private int _lineNumber = -1, _columnNumber = -1;
+		public int LineNumber {
+			get => _lineNumber;
+			set {
+				if (_lineNumber != -1)
+					throw new InvalidOperationException("Can't overwrite existing line number data");
+				if (value < 0)
+					throw new ArgumentException("Error: Negative line numbers don't make sense.");
+
+				_lineNumber = value;
+			}
+		}
+		public int ColumnNumber {
+			get => _columnNumber;
+			set {
+				if(_columnNumber != -1)
+					throw new InvalidOperationException("Can't overwrite existing column number data");
+				if(value < 0)
+					throw new ArgumentException("Error: Negative column numbers don't make sense.");
+
+				_columnNumber = value;
+			}
+		}
+
+		public readonly bool IsNullMatch = false;
+		public readonly LexerRule<TokenType> Rule = null;
+		public readonly Match RegexMatch;
+
+		public TokenType Type {
+			get {
+				try
+				{
+					return Rule.Type;
+				}
+				catch (NullReferenceException)
+				{
+					return default(TokenType);
+				}
+			}
+		}
+		private string nullValueData;
+		public string Value {
+			get {
+				return IsNullMatch ? nullValueData : RegexMatch.Value;
+			}
+		}
+
+		public LexerToken(LexerRule<TokenType> inRule, Match inMatch)
+		{
+			Rule = inRule;
+			RegexMatch = inMatch;
+		}
+		public LexerToken(string unknownData)
+		{
+			IsNullMatch = true;
+			nullValueData = unknownData;
+		}
+
+
+		#region Overrides
+
+		public override string ToString()
+		{
+			return $"[LexerToken @ {LineNumber}:{ColumnNumber} Type={Type}, Value={Value}]";
+		}
+
+		#endregion
+	}
+}
--- a/PolyFeed/SubstitutionLexer.cs
+++ b/PolyFeed/SubstitutionLexer.cs
@ -0,0 +1,39 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using Salamander.Core.Lexer;
+
+namespace PolyFeed
+{
+	internal enum SubstitutionToken
+	{
+		Unknown = 0,
+
+		Text,
+
+		BraceOpen,
+		BraceClose,
+		Identifier
+
+	}
+
+	internal class SubstitutionLexer : Lexer<SubstitutionToken>
+	{
+		public SubstitutionLexer()
+		{
+			AddRules(new List<LexerRule<SubstitutionToken>>() {
+				new LexerRule<SubstitutionToken>(SubstitutionToken.Text, @"[^{}]+"),
+				new LexerRule<SubstitutionToken>(SubstitutionToken.Identifier, @"[^{}]+"),
+				new LexerRule<SubstitutionToken>(SubstitutionToken.BraceOpen, @"\{"),
+				new LexerRule<SubstitutionToken>(SubstitutionToken.BraceClose, @"\}"),
+			});
+		}
+
+		public override void Initialise(StreamReader reader)
+		{
+			base.Initialise(reader);
+
+			DisableRule(SubstitutionToken.Identifier);
+		}
+	}
+}
--- a/PolyFeed/packages.config
+++ b/PolyFeed/packages.config
@ -2,7 +2,7 @@
 <packages>
  <package id="Fizzler" version="1.2.0" targetFramework="net47" />
  <package id="Fizzler.Systems.HtmlAgilityPack" version="1.2.0" targetFramework="net47" />
-  <package id="HtmlAgilityPack" version="1.11.9" targetFramework="net47" />
+  <package id="HtmlAgilityPack" version="1.11.12" targetFramework="net47" />
  <package id="Microsoft.NETCore.Platforms" version="2.2.2" targetFramework="net47" />
  <package id="Microsoft.SyndicationFeed.ReaderWriter" version="1.0.2" targetFramework="net47" />
  <package id="Microsoft.Win32.Primitives" version="4.3.0" targetFramework="net47" />