Finish the initial HTML implementation.

2024-11-26 07:03:00 +00:00 · 2019-07-28 17:24:21 +01:00 · 2019-07-28 17:24:21 +01:00 · 14fca32a5e
commit 14fca32a5e
parent 59a0289b3a
12 changed files with 746 additions and 14 deletions
--- a/PolyFeed/FeedBuilder.cs
+++ b/PolyFeed/FeedBuilder.cs
@ -1,9 +1,13 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Net;
 using System.Text;
 using System.Threading.Tasks;
 using System.Xml;
 using Fizzler.Systems.HtmlAgilityPack;
 using HtmlAgilityPack;
 using Microsoft.SyndicationFeed;
 using Microsoft.SyndicationFeed.Atom;
 namespace PolyFeed
@ -22,9 +26,68 @@ namespace PolyFeed
 		public async Task AddSource(FeedSource source) {
 			WebResponse response = await WebRequest.Create(source.Url).GetResponseAsync();
 			using StreamReader reader = new StreamReader(response.GetResponseStream());
 			// Write the header
 			await feed.WriteGenerator("Polyfeed", "https://gitlab.com/sbrl/PolyFeed.git", Program.getProgramVersion());
 			await feed.WriteId(source.Url);
 			string lastModified = response.Headers.Get("last-modified");
 			if (string.IsNullOrWhiteSpace(lastModified))
 				await feed.WriteUpdated(DateTimeOffset.Now);
 			else
 				await feed.WriteUpdated(DateTimeOffset.Parse(lastModified));
 			string contentType = response.Headers.Get("content-type");
 			switch (source.SourceType) {
 				case SourceType.HTML:
 					await AddSourceHtml(source, response);
 					break;
 				default:
 					throw new NotImplementedException($"Error: The source type {source.SourceType} hasn't been implemented yet.");
 			}
 		}
 		private async Task AddSourceHtml(FeedSource source, WebResponse response) {
 			HtmlDocument html = new HtmlDocument();
 			using (StreamReader reader = new StreamReader(response.GetResponseStream()))
 				html.LoadHtml(await reader.ReadToEndAsync());
 			HtmlNode document = html.DocumentNode;
 			await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Title, document));
 			await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Subtitle, document));
 			foreach (HtmlNode nextNode in document.QuerySelectorAll(source.EntrySelector)) {
 				HtmlNode urlNode = nextNode.QuerySelector(source.EntryUrlSelector);
 				string url = source.EntryUrlAttribute == string.Empty ? 
 					urlNode.InnerText : urlNode.Attributes[source.EntryUrlAttribute].DeEntitizeValue;
 				SyndicationItem nextItem = new SyndicationItem() {
 					Id = url,
 					Title = ReferenceSubstitutor.Replace(source.EntryTitle, nextNode),
 					Description = ReferenceSubstitutor.Replace(source.EntryContent, nextNode)
 				};
 				if (source.EntryPublishedSelector != string.Empty) {
 					HtmlNode publishedNode = nextNode.QuerySelector(source.EntryPublishedSelector);
 					nextItem.Published = DateTime.Parse(
 						source.EntryPublishedAttribute == string.Empty
 							? publishedNode.InnerText
 							: publishedNode.Attributes[source.EntryPublishedAttribute].DeEntitizeValue
 					);
 				}
 				if (source.EntryPublishedSelector != string.Empty) {
 					HtmlNode lastUpdatedNode = nextNode.QuerySelector(source.EntryLastUpdatedSelector);
 					nextItem.Published = DateTime.Parse(
 						source.EntryLastUpdatedAttribute == string.Empty
 							? lastUpdatedNode.InnerText
 							: lastUpdatedNode.Attributes[source.EntryLastUpdatedAttribute].DeEntitizeValue
 					);
 				}
 			}
 		}
 	}
 }
--- a/PolyFeed/FeedSource.cs
+++ b/PolyFeed/FeedSource.cs
@ -1,4 +1,5 @@
 using System;
 namespace PolyFeed
 {
 	public enum SourceType { HTML, XML, JSON };
@ -10,31 +11,43 @@ namespace PolyFeed
 		/// </summary>
 		/// <value>The URL.</value>
 		public string Url { get; set; }
-		/// <summary>
+
 		/// The title of the feed.
 		/// </summary>
 		public string Title { get; set; }
 		/// <summary>
 		/// The type of source document to expect.
 		/// </summary>
 		public SourceType SourceType { get; set; }
 		/// <summary>
 		/// The title of the feed.
 		/// Supports the same {} syntax as <see cref="EntryTitle" />.
 		/// </summary>
 		public string Title { get; set; }
 		/// <summary>
 		/// The subtitle of the feed.
 		/// Supports the same {} syntax as <see cref="EntryTitle" />.
 		/// </summary>
 		/// <value>The subtitle.</value>
 		public string Subtitle { get; set; }
 		#region Entries
 		/// <summary>
 		/// A selector that matches against an element that contains the URL that an
 		/// entry should link to.
 		/// Relative to the element selected by <see cref="EntrySelector" />.
 		/// </summary>
-		public string UrlSelector { get; set; }
+		public string EntryUrlSelector { get; set; }
 		/// <summary>
-		/// The name of the attribute on the element selected by <see cref="UrlSelector" />.
+		/// The name of the attribute on the element selected by <see cref="EntryUrlSelector" />.
 		/// Set to an empty string to select the content of the element instead of the 
 		/// content of an attribute.
 		/// </summary>
-		public string UrlElementAttribute { get; set; } = "";
+		public string EntryUrlAttribute { get; set; } = "";
 		/// <summary>
-		/// The selector that specifies the location in the object model of nodes that should 
+		/// The selector that specifies the location of nodes in the object model that 
-		/// be added to the feed.
+		/// should be added to the feed.
 		/// The format varies depending on the <see cref="SourceType" />.
 		///  - HTML: CSS selector (e.g. main > article)
 		///  - XML: XPath (e.g. //element_name)
@ -52,5 +65,29 @@ namespace PolyFeed
 		/// Same as <see cref="EntryTitle" />, but for the body of an entry. HTML is allowed.
 		/// </summary>
 		public string EntryContent { get; set; }
 		/// <summary>
 		/// The selector for the node that contains the date published for an entry.
 		/// </summary>
 		public string EntryPublishedSelector { get; set; }
 		/// <summary>
 		/// The name of the attribute that contains the date published for an entry.
 		/// Set to <see cref="string.Empty" /> to use the content of the node itself.
 		/// </summary>
 		public string EntryPublishedAttribute { get; set; }
 		/// <summary>
 		/// Same as <see cref="EntryPublishedSelector" />, but for the last updated.
 		/// If not specified, the last updated will be omitted.
 		/// </summary>
 		public string EntryLastUpdatedSelector { get; set; }
 		/// <summary>
 		/// Same as <see cref="EntryPublishedAttribute" />.
 		/// </summary>
 		public string EntryLastUpdatedAttribute { get; set; }
 		#endregion
 	}
 }
--- a/PolyFeed/PolyFeed.csproj
+++ b/PolyFeed/PolyFeed.csproj
@ -34,7 +34,7 @@
      <HintPath>..\packages\Fizzler.1.2.0\lib\netstandard2.0\Fizzler.dll</HintPath>
    </Reference>
    <Reference Include="HtmlAgilityPack">
-      <HintPath>..\packages\HtmlAgilityPack.1.11.9\lib\Net45\HtmlAgilityPack.dll</HintPath>
+      <HintPath>..\packages\HtmlAgilityPack.1.11.12\lib\Net45\HtmlAgilityPack.dll</HintPath>
    </Reference>
    <Reference Include="Microsoft.Win32.Primitives">
      <HintPath>..\packages\Microsoft.Win32.Primitives.4.3.0\lib\net46\Microsoft.Win32.Primitives.dll</HintPath>
@ -136,10 +136,20 @@
    <Compile Include="Properties\AssemblyInfo.cs" />
    <Compile Include="FeedBuilder.cs" />
    <Compile Include="FeedSource.cs" />
    <Compile Include="Salamander.Core\Lexer.cs" />
    <Compile Include="Salamander.Core\LexerRule.cs" />
    <Compile Include="Salamander.Core\LexerToken.cs" />
    <Compile Include="Salamander.Core\Ansi.cs" />
    <Compile Include="SubstitutionLexer.cs" />
    <Compile Include="Salamander.Core\LexerPool.cs" />
    <Compile Include="ReferenceSubstitutor.cs" />
  </ItemGroup>
  <ItemGroup>
    <None Include="packages.config" />
  </ItemGroup>
  <ItemGroup>
    <Folder Include="Salamander.Core\" />
  </ItemGroup>
  <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
  <Import Project="..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets" Condition="Exists('..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets')" />
 </Project>
--- a/PolyFeed/Program.cs
+++ b/PolyFeed/Program.cs
@ -3,7 +3,7 @@ using System.Collections.Generic;
 using System.IO;
 using System.Reflection;
-namespace ProjectNamespace
+namespace PolyFeed
 {
 	internal class Settings
 	{
@ -67,7 +67,7 @@ namespace ProjectNamespace
 		#region Helper Methods
-		private static string getProgramVersion()
+		public static string getProgramVersion()
 		{
 			Version version = Assembly.GetExecutingAssembly().GetName().Version;
 			return $"{version.Major}.{version.Minor}";
--- a/PolyFeed/ReferenceSubstitutor.cs
+++ b/PolyFeed/ReferenceSubstitutor.cs
@ -0,0 +1,44 @@
 using System;
 using System.Text;
 using Fizzler.Systems.HtmlAgilityPack;
 using HtmlAgilityPack;
 using Salamander.Core.Lexer;
 namespace PolyFeed
 {
 	internal static class ReferenceSubstitutor {
 		private static LexerPool<SubstitutionLexer, SubstitutionToken> lexerPool = new LexerPool<SubstitutionLexer, SubstitutionToken>();
 		public static string Replace(string inputString, HtmlNode rootElement)
 		{
 			StringBuilder result = new StringBuilder();
 			SubstitutionLexer lexer = lexerPool.AcquireLexer();
 			lexer.Initialise(inputString);
 			foreach (LexerToken<SubstitutionToken> nextToken in lexer.TokenStream())
 			{
 				switch (nextToken.Type) {
 					case SubstitutionToken.BraceOpen:
 						lexer.SaveRuleStates();
 						lexer.EnableRule(SubstitutionToken.Identifier);
 						lexer.DisableRule(SubstitutionToken.Text);
 						break;
 					case SubstitutionToken.BraceClose:
 						lexer.RestoreRuleStates();
 						break;
 					case SubstitutionToken.Text:
 						result.Append(nextToken.Value);
 						break;
 					case SubstitutionToken.Identifier:
 						result.Append(rootElement.QuerySelector(nextToken.Value));
 						break;
 				}
 			}
 			lexerPool.ReleaseLexer(lexer);
 			return result.ToString();
 		}
 	}
 }
--- a/PolyFeed/Salamander.Core/Ansi.cs
+++ b/PolyFeed/Salamander.Core/Ansi.cs
@ -0,0 +1,49 @@
 using System;
 namespace Salamander.Core.Helpers
 {
 	public static class Ansi
 	{
 		/// <summary>
 		/// Whether we should *actually* emit ANSI escape codes or not.
 		/// Useful when we want to output to a log file, for example.
 		/// </summary>
 		public static bool Enabled { get; set; } = true;
 		// Solution on how to output ANSI escape codes in C# from here:
 		// https://www.jerriepelser.com/blog/using-ansi-color-codes-in-net-console-apps
 		public static string Reset => Enabled ? "\u001b[0m" : "";
 		public static string HiCol => Enabled ? "\u001b[1m" : "";
 		public static string Underline => Enabled ? "\u001b[4m" : "";
 		public static string Inverse => Enabled ? "\u001b[7m" : "";
 		public static string FBlack => Enabled ? "\u001b[30m" : "";
 		public static string FRed => Enabled ? "\u001b[31m" : "";
 		public static string FGreen => Enabled ? "\u001b[32m" : "";
 		public static string FYellow => Enabled ? "\u001b[33m" : "";
 		public static string FBlue => Enabled ? "\u001b[34m" : "";
 		public static string FMagenta => Enabled ? "\u001b[35m" : "";
 		public static string FCyan => Enabled ? "\u001b[36m" : "";
 		public static string FWhite => Enabled ? "\u001b[37m" : "";
 		public static string BBlack => Enabled ? "\u001b[40m" : "";
 		public static string BRed => Enabled ? "\u001b[41m" : "";
 		public static string BGreen => Enabled ? "\u001b[42m" : "";
 		public static string BYellow => Enabled ? "\u001b[43m" : "";
 		public static string BBlue => Enabled ? "\u001b[44m" : "";
 		public static string BMagenta => Enabled ? "\u001b[45m" : "";
 		public static string BCyan => Enabled ? "\u001b[46m" : "";
 		public static string BWhite => Enabled ? "\u001b[47m" : "";
 		// Thanks to http://ascii-table.com/ansi-escape-sequences.php for the following ANSI escape sequences
 		public static string Up(int lines = 1) => Enabled ? $"\u001b[{lines}A" : "";
 		public static string Down(int lines = 1) => Enabled ? $"\u001b[{lines}B" : "";
 		public static string Right(int lines = 1) => Enabled ? $"\u001b[{lines}C" : "";
 		public static string Left(int lines = 1) => Enabled ? $"\u001b[{lines}D" : "";
 		//public static string JumpTo(Vector2 pos) => $"\u001b[{pos.Y};{pos.X}H" : "";
 		public static string CursorPosSave => Enabled ? $"\u001b[s" : "";
 		public static string CursorPosRestore => Enabled ? $"\u001b[u" : "";
 	}
 }
--- a/PolyFeed/Salamander.Core/Lexer.cs
+++ b/PolyFeed/Salamander.Core/Lexer.cs
@ -0,0 +1,328 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Text;
 using System.Text.RegularExpressions;
 using Salamander.Core.Helpers;
 namespace Salamander.Core.Lexer
 {
 	public class Lexer<TokenType>
 	{
 		/// <summary>
 		/// The rules that should be used during the lexing process.
 		/// </summary>
 		public List<LexerRule<TokenType>> Rules { get; private set; } = new List<LexerRule<TokenType>>();
 		/// <summary>
 		/// Tokens in this list will be matched against, but not emitted by the lexer
 		/// into the main token stream.
 		/// Useful for catching and disposing of sequences of characters you don't want escaping
 		/// or breaking your parser.
 		/// </summary>
 		public List<TokenType> IgnoreTokens { get; private set; } = new List<TokenType>();
 		/// <summary>
 		/// Whether the lexer should be verbose and log a bunch of debugging information 
 		/// to the console.
 		/// </summary>
 		public bool Verbose { get; set; } = false;
 		/// <summary>
 		/// The number of the line that currently being scanned.
 		/// </summary>
 		public int CurrentLineNumber { get; private set; } = 0;
 		/// <summary>
 		/// The number of characters on the current line that have been scanned.
 		/// </summary>
 		/// <value>The current line position.</value>
 		public int CurrentLinePos { get; private set; } = 0;
 		/// <summary>
 		/// The total number of characters currently scanned by this lexer instance.
 		/// Only updated every newline!
 		/// </summary>
 		public int TotalCharsScanned { get; private set; } = 0;
 		/// <summary>
 		/// The internal stream that we should read from when lexing.
 		/// </summary>
 		private StreamReader textStream;
 		/// <summary>
 		/// A stack of rule states.
 		/// Whether rules are enabled or disabled can be recursively saved and restored - 
 		/// this <see cref="Stack{T}" /> is how the lexer saves this information.
 		/// </summary>
 		private Stack<Dictionary<LexerRule<TokenType>, bool>> EnabledStateStack = new Stack<Dictionary<LexerRule<TokenType>, bool>>();
 		/// <summary>
 		/// Creates a new <see cref="Lexer{TokenType}" />, optionally containing the given 
 		/// <see cref="LexerRule{TokenType}" /> instances.
 		/// </summary>
 		/// <param name="initialRules">The rules to add to the new <see cref="Lexer{TokenType}" />.</param>
 		public Lexer(params LexerRule<TokenType>[] initialRules)
 		{
 			AddRules(initialRules);
 		}
 		/// <summary>
 		/// Adds a single lexing rule to the <see cref="Lexer{TokenType}" />.
 		/// </summary>
 		/// <param name="newRule">The rule to add.</param>
 		public void AddRule(LexerRule<TokenType> newRule)
 			=> Rules.Add(newRule);
 		/// <summary>
 		/// Adds a bunch of lexing rules to the <see cref="Lexer{TokenType}" />.
 		/// </summary>
 		/// <param name="newRules">The rules to add.</param>
 		public void AddRules(IEnumerable<LexerRule<TokenType>> newRules)
 			=> Rules.AddRange(newRules);
 		/// <summary>
 		/// Reinitialises the parser with a new input stream.
 		/// </summary>
 		/// <remarks>
 		/// Child classes should override this method to do their own state initialisation,
 		/// as lexers MAY be re-used on multiple input streams.
 		/// Implementors must be careful not to forget to call this base method though.
 		/// </remarks>
 		/// <param name="reader">The <see cref="StreamReader"/> to use as the new input stream..</param>
 		public virtual void Initialise(StreamReader reader)
 		{
 			// Reset the counters
 			CurrentLineNumber = 0;
 			CurrentLinePos = 0;
 			TotalCharsScanned = 0;
 			// Reset the state stack
 			EnabledStateStack.Clear();
 			// Re-enable all rules
 			EnableAllRules();
 			textStream = reader;
 		}
 		public void Initialise(string input)
 		{
 			MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(input));
 			Initialise(new StreamReader(stream));
 		}
 		/// <summary>
 		/// Performs the lexing process itself in an incremental manner.
 		/// Note that a single Lexer may only do a single lex at a time - even if it's the 
 		/// same document multiple times over.
 		/// </summary>
 		/// <returns>A stream of lexical tokens.</returns>
 		public IEnumerable<LexerToken<TokenType>> TokenStream()
 		{
 			string nextLine;
 			List<LexerToken<TokenType>> matches = new List<LexerToken<TokenType>>();
 			while ((nextLine = textStream.ReadLine()) != null)
 			{
 				CurrentLinePos = 0;
 				while (CurrentLinePos < nextLine.Length)
 				{
 					matches.Clear();
 					foreach (LexerRule<TokenType> rule in Rules)
 					{
 						if (!rule.Enabled) continue;
 						Match nextMatch = rule.RegEx.Match(nextLine, CurrentLinePos);
 						if (!nextMatch.Success) continue;
 						matches.Add(
 							new LexerToken<TokenType>(rule, nextMatch)
 							{
 								LineNumber = CurrentLineNumber,
 								ColumnNumber = nextMatch.Index
 							}
 						);
 					}
 					if (matches.Count == 0)
 					{
 						string unknownTokenContent = nextLine.Substring(CurrentLinePos);
 						if (Verbose) Console.WriteLine($"{Ansi.FRed}[Unknown Token: No matches found for this line]{Ansi.Reset} {0}", unknownTokenContent);
 						yield return new LexerToken<TokenType>(unknownTokenContent)
 						{
 							LineNumber = CurrentLineNumber,
 							ColumnNumber = CurrentLinePos
 						};
 						break;
 					}
 					matches.Sort((LexerToken<TokenType> a, LexerToken<TokenType> b) => {
 						// Match of offset position position
 						int result = a.ColumnNumber - b.ColumnNumber;
 						// If they both start at the same position, then go with highest priority one
 						if (result == 0)
 							result = b.Rule.Priority - a.Rule.Priority;
 						// Failing that, try the longest one
 						if (result == 0)
 							result = b.RegexMatch.Length - a.RegexMatch.Length;
 						return result;
 					});
 					LexerToken<TokenType> selectedToken = matches[0];
 					int selectedTokenOffset = nextLine.IndexOf(selectedToken.RegexMatch.Value, CurrentLinePos) - CurrentLinePos;
 					if (selectedTokenOffset > 0)
 					{
 						string extraTokenContent = nextLine.Substring(CurrentLinePos, selectedTokenOffset);
 						int unmatchedLinePos = CurrentLinePos;
 						CurrentLinePos += selectedTokenOffset;
 						if (Verbose) Console.WriteLine($"{Ansi.FRed}[Unmatched content]{Ansi.Reset} '{extraTokenContent}'");
 						// Return the an unknown token, but only if we're not meant to be ignoring them
 						if (!IgnoreTokens.Contains((TokenType)Enum.ToObject(typeof(TokenType), 0)))
 						{
 							yield return new LexerToken<TokenType>(extraTokenContent)
 							{
 								LineNumber = CurrentLineNumber,
 								ColumnNumber = unmatchedLinePos
 							};
 						}
 					}
 					CurrentLinePos += selectedToken.RegexMatch.Length;
 					if (Verbose) Console.WriteLine($"{(IgnoreTokens.Contains(selectedToken.Type) ? Ansi.FBlack : Ansi.FGreen)}{selectedToken}{Ansi.Reset}");
 					// Yield the token, but only if we aren't supposed to be ignoring it
 					if (IgnoreTokens.Contains(selectedToken.Type))
 						continue;
 					yield return selectedToken;
 				}
 				if (Verbose) Console.WriteLine($"{Ansi.FBlue}[Lexer]{Ansi.Reset} Next line");
 				CurrentLineNumber++;
 				TotalCharsScanned += CurrentLinePos;
 			}
 		}
 		#region Rule Management
 		/// <summary>
 		/// Enables all <see cref="LexerRule{TokenType}" />s currently registered against
 		/// this Lexer.
 		/// </summary>
 		public void EnableAllRules() => EnableRulesByPrefix("");
 		/// <summary>
 		/// Disables all <see cref="LexerRule{TokenType}" />s currently registered against
 		/// this Lexer.
 		/// </summary>
 		public void DisableAllRules() => DisableRulesByPrefix("");
 		/// <summary>
 		/// Enables the rule that matches against the given <see cref="TokenType" />.
 		/// </summary>
 		/// <param name="type">The token type to use to find the rule to enable.</param>
 		public void EnableRule(TokenType type) => SetRule(type, true);
 		/// <summary>
 		/// Disables the rule that matches against the given <see cref="TokenType" />.
 		/// </summary>
 		/// <param name="type">The token type to use to find the rule to disable.</param>
 		public void DisableRule(TokenType type) => SetRule(type, false);
 		/// <summary>
 		/// Sets the enabled status of the rule that matches against the given 
 		/// <see cref="TokenType" /> to the given state.
 		/// </summary>
 		/// <param name="type">The <see cref="TokenType" /> to use to find the rule to 
 		/// sets the enabled state of.</param>
 		/// <param name="state">Whether to enable or disable the rule. <see langword="true"/> = enable it, <see langword="false"/> = disable it.</param>
 		public void SetRule(TokenType type, bool state)
 		{
 			foreach (LexerRule<TokenType> rule in Rules)
 			{
 				// We have to do a string comparison here because of the generic type we're using in multiple nested
 				// classes
 				if (Enum.GetName(rule.Type.GetType(), rule.Type) == Enum.GetName(type.GetType(), type))
 				{
 					rule.Enabled = state;
 					return;
 				}
 			}
 		}
 		/// <summary>
 		/// Toggles the enabled status of multiple rules by finding rules that generate 
 		/// tokens whose name begins with a specific substring.
 		/// </summary>
 		/// <param name="tokenTypePrefix">The prefix to use when finding rules to toggle.</param>
 		public void ToggleRulesByPrefix(string tokenTypePrefix)
 		{
 			foreach (LexerRule<TokenType> rule in Rules)
 			{
 				// We have to do a string comparison here because of the generic type we're using in multiple nested
 				// classes
 				if (Enum.GetName(rule.Type.GetType(), rule.Type).StartsWith(tokenTypePrefix, StringComparison.CurrentCulture))
 					rule.Enabled = !rule.Enabled;
 			}
 		}
 		/// <summary>
 		/// Enables multiple rules by finding rules that generate 
 		/// tokens whose name begins with a specific substring.
 		/// </summary>
 		/// <param name="tokenTypePrefix">The prefix to use when finding rules to enable.</param>
 		public void EnableRulesByPrefix(string tokenTypePrefix)
 			=> SetRulesByPrefix(tokenTypePrefix, true);
 		/// <summary>
 		/// Disables multiple rules by finding rules that generate 
 		/// tokens whose name begins with a specific substring.
 		/// </summary>
 		/// <param name="tokenTypePrefix">The prefix to use when finding rules to disable.</param>
 		public void DisableRulesByPrefix(string tokenTypePrefix)
 			=> SetRulesByPrefix(tokenTypePrefix, false);
 		/// <summary>
 		/// Set the enabled status of multiple rules by finding rules that generate 
 		/// tokens whose name begins with a specific substring.
 		/// </summary>
 		/// <param name="tokenTypePrefix">The prefix to use when finding rules to set the 
 		/// status of.</param>
 		public void SetRulesByPrefix(string tokenTypePrefix, bool state)
 		{
 			foreach (LexerRule<TokenType> rule in Rules)
 			{
 				// We have to do a string comparison here because of the generic type we're using in multiple nested
 				// classes
 				if (Enum.GetName(rule.Type.GetType(), rule.Type).StartsWith(tokenTypePrefix, StringComparison.CurrentCulture))
 				{
 					//if(Verbose) Console.WriteLine($"{Ansi.FBlue}[Lexer/Rules] {Ansi.FCyan}Setting {rule.Type} to {state}");
 					rule.Enabled = state;
 				}
 			}
 		}
 		/// <summary>
 		/// Saves the current rule states (i.e. whether they are enabled or not) as a snapshot to an
 		/// internal stack.
 		/// </summary>
 		public void SaveRuleStates()
 		{
 			Dictionary<LexerRule<TokenType>, bool> states = new Dictionary<LexerRule<TokenType>, bool>();
 			foreach (LexerRule<TokenType> nextRule in Rules)
 				states[nextRule] = nextRule.Enabled;
 			EnabledStateStack.Push(states);
 		}
 		/// <summary>
 		/// Restores the top-most rule states snapshot from the internal stack.
 		/// </summary>
 		/// <exception cref="InvalidOperationException">Thrown if there aren't any states left on the stack to restore.</exception>
 		public void RestoreRuleStates()
 		{
 			if (EnabledStateStack.Count < 1)
 				throw new InvalidOperationException("Error: Can't restore the lexer rule states when no states have been saved!");
 			Dictionary<LexerRule<TokenType>, bool> states = EnabledStateStack.Pop();
 			foreach (KeyValuePair<LexerRule<TokenType>, bool> nextRulePair in states)
 				nextRulePair.Key.Enabled = nextRulePair.Value;
 		}
 		#endregion
 	}
 }
--- a/PolyFeed/Salamander.Core/LexerPool.cs
+++ b/PolyFeed/Salamander.Core/LexerPool.cs
@ -0,0 +1,34 @@
 using System;
 using System.Collections.Generic;
 namespace Salamander.Core.Lexer
 {
 	/// <summary>
 	/// Represents a pool of reusable <see cref="Lexer{TokenType}"/>s.
 	/// Useful to avoid memory churn when lexing lots of different input streams.
 	/// </summary>
 	public class LexerPool<T, E> where T : Lexer<E>, new()
 	{
 		private List<T> freeLexers = new List<T>();
 		public LexerPool()
 		{
 		}
 		public T AcquireLexer()
 		{
 			if (freeLexers.Count > 0)
 			{
 				T lexer = freeLexers[0];
 				freeLexers.Remove(lexer);
 				return lexer;
 			}
 			return new T();
 		}
 		public void ReleaseLexer(T lexer)
 		{
 			freeLexers.Add(lexer);
 		}
 	}
 }
--- a/PolyFeed/Salamander.Core/LexerRule.cs
+++ b/PolyFeed/Salamander.Core/LexerRule.cs
@ -0,0 +1,52 @@
 using System;
 using System.Text.RegularExpressions;
 namespace Salamander.Core.Lexer
 {
 	public class LexerRule<TokenType>
 	{
 		/// <summary>
 		/// The token type that a match against this rule should generate.
 		/// </summary>
 		public readonly TokenType Type;
 		/// <summary>
 		/// The regular expression to use to find matches.
 		/// </summary>
 		public readonly Regex RegEx;
 		/// <summary>
 		/// The priority of this rule.
 		/// </summary>
 		/// <remarks>
 		/// If there are multiple matches, then the one with the highest priority will be matched 
 		/// against first.
 		/// Failing that, the longest match will be taken first.
 		/// Note that even if a match has a higher priority, a match from a lower priority rule 
 		/// will be used instead if it occurs earlier in the source, as this will result in fewer 
 		/// unmatched characters.
 		/// </remarks>
 		public int Priority { get; set; } = 0;
 		/// <summary>
 		/// Whether this rule is currently enabled or not. This can be changed on-the-fly whilst lexing.
 		/// Sometimes useful when handling more complicated logic.
 		/// Be careful though, as if you start needing this, perhaps you should evaluate whether 
 		/// utilising the fuller capabilities of the parser would be more appropriate instead.
 		/// </summary>
 		public bool Enabled { get; set; } = true;
 		public LexerRule(TokenType inName, string inRegEx, RegexOptions inRegexOptions = RegexOptions.None, int inPriority = 0)
 		{
 			if (!typeof(TokenType).IsEnum)
 				throw new ArgumentException($"Error: inName must be an enum - {typeof(TokenType)} passed");
 			Type = inName;
 			RegEx = new Regex(inRegEx, inRegexOptions | RegexOptions.Compiled);
 			Priority = inPriority;
 		}
 		public bool Toggle()
 		{
 			Enabled = !Enabled;
 			return Enabled;
 		}
 	}
 }
--- a/PolyFeed/Salamander.Core/LexerToken.cs
+++ b/PolyFeed/Salamander.Core/LexerToken.cs
@ -0,0 +1,76 @@
 using System;
 using System.Text.RegularExpressions;
 namespace Salamander.Core.Lexer
 {
 	public class LexerToken<TokenType>
 	{
 		private int _lineNumber = -1, _columnNumber = -1;
 		public int LineNumber {
 			get => _lineNumber;
 			set {
 				if (_lineNumber != -1)
 					throw new InvalidOperationException("Can't overwrite existing line number data");
 				if (value < 0)
 					throw new ArgumentException("Error: Negative line numbers don't make sense.");
 				_lineNumber = value;
 			}
 		}
 		public int ColumnNumber {
 			get => _columnNumber;
 			set {
 				if(_columnNumber != -1)
 					throw new InvalidOperationException("Can't overwrite existing column number data");
 				if(value < 0)
 					throw new ArgumentException("Error: Negative column numbers don't make sense.");
 				_columnNumber = value;
 			}
 		}
 		public readonly bool IsNullMatch = false;
 		public readonly LexerRule<TokenType> Rule = null;
 		public readonly Match RegexMatch;
 		public TokenType Type {
 			get {
 				try
 				{
 					return Rule.Type;
 				}
 				catch (NullReferenceException)
 				{
 					return default(TokenType);
 				}
 			}
 		}
 		private string nullValueData;
 		public string Value {
 			get {
 				return IsNullMatch ? nullValueData : RegexMatch.Value;
 			}
 		}
 		public LexerToken(LexerRule<TokenType> inRule, Match inMatch)
 		{
 			Rule = inRule;
 			RegexMatch = inMatch;
 		}
 		public LexerToken(string unknownData)
 		{
 			IsNullMatch = true;
 			nullValueData = unknownData;
 		}
 		#region Overrides
 		public override string ToString()
 		{
 			return $"[LexerToken @ {LineNumber}:{ColumnNumber} Type={Type}, Value={Value}]";
 		}
 		#endregion
 	}
 }
--- a/PolyFeed/SubstitutionLexer.cs
+++ b/PolyFeed/SubstitutionLexer.cs
@ -0,0 +1,39 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
 using Salamander.Core.Lexer;
 namespace PolyFeed
 {
 	internal enum SubstitutionToken
 	{
 		Unknown = 0,
 		Text,
 		BraceOpen,
 		BraceClose,
 		Identifier
 	}
 	internal class SubstitutionLexer : Lexer<SubstitutionToken>
 	{
 		public SubstitutionLexer()
 		{
 			AddRules(new List<LexerRule<SubstitutionToken>>() {
 				new LexerRule<SubstitutionToken>(SubstitutionToken.Text, @"[^{}]+"),
 				new LexerRule<SubstitutionToken>(SubstitutionToken.Identifier, @"[^{}]+"),
 				new LexerRule<SubstitutionToken>(SubstitutionToken.BraceOpen, @"\{"),
 				new LexerRule<SubstitutionToken>(SubstitutionToken.BraceClose, @"\}"),
 			});
 		}
 		public override void Initialise(StreamReader reader)
 		{
 			base.Initialise(reader);
 			DisableRule(SubstitutionToken.Identifier);
 		}
 	}
 }
--- a/PolyFeed/packages.config
+++ b/PolyFeed/packages.config
@ -2,7 +2,7 @@
 <packages>
  <package id="Fizzler" version="1.2.0" targetFramework="net47" />
  <package id="Fizzler.Systems.HtmlAgilityPack" version="1.2.0" targetFramework="net47" />
-  <package id="HtmlAgilityPack" version="1.11.9" targetFramework="net47" />
+  <package id="HtmlAgilityPack" version="1.11.12" targetFramework="net47" />
  <package id="Microsoft.NETCore.Platforms" version="2.2.2" targetFramework="net47" />
  <package id="Microsoft.SyndicationFeed.ReaderWriter" version="1.0.2" targetFramework="net47" />
  <package id="Microsoft.Win32.Primitives" version="4.3.0" targetFramework="net47" />