diff --git a/PolyFeed/FeedBuilder.cs b/PolyFeed/FeedBuilder.cs index c806efa..b783e69 100644 --- a/PolyFeed/FeedBuilder.cs +++ b/PolyFeed/FeedBuilder.cs @@ -1,9 +1,13 @@ using System; +using System.Collections.Generic; using System.IO; using System.Net; using System.Text; using System.Threading.Tasks; using System.Xml; +using Fizzler.Systems.HtmlAgilityPack; +using HtmlAgilityPack; +using Microsoft.SyndicationFeed; using Microsoft.SyndicationFeed.Atom; namespace PolyFeed @@ -22,9 +26,68 @@ namespace PolyFeed public async Task AddSource(FeedSource source) { WebResponse response = await WebRequest.Create(source.Url).GetResponseAsync(); - using StreamReader reader = new StreamReader(response.GetResponseStream()); + // Write the header + await feed.WriteGenerator("Polyfeed", "https://gitlab.com/sbrl/PolyFeed.git", Program.getProgramVersion()); + await feed.WriteId(source.Url); + string lastModified = response.Headers.Get("last-modified"); + if (string.IsNullOrWhiteSpace(lastModified)) + await feed.WriteUpdated(DateTimeOffset.Now); + else + await feed.WriteUpdated(DateTimeOffset.Parse(lastModified)); + + string contentType = response.Headers.Get("content-type"); + + switch (source.SourceType) { + case SourceType.HTML: + await AddSourceHtml(source, response); + break; + default: + throw new NotImplementedException($"Error: The source type {source.SourceType} hasn't been implemented yet."); + } + } + + private async Task AddSourceHtml(FeedSource source, WebResponse response) { + HtmlDocument html = new HtmlDocument(); + using (StreamReader reader = new StreamReader(response.GetResponseStream())) + html.LoadHtml(await reader.ReadToEndAsync()); + + HtmlNode document = html.DocumentNode; + + await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Title, document)); + await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Subtitle, document)); + + foreach (HtmlNode nextNode in document.QuerySelectorAll(source.EntrySelector)) { + HtmlNode urlNode = nextNode.QuerySelector(source.EntryUrlSelector); + string url = source.EntryUrlAttribute == string.Empty ? + urlNode.InnerText : urlNode.Attributes[source.EntryUrlAttribute].DeEntitizeValue; + + + SyndicationItem nextItem = new SyndicationItem() { + Id = url, + Title = ReferenceSubstitutor.Replace(source.EntryTitle, nextNode), + Description = ReferenceSubstitutor.Replace(source.EntryContent, nextNode) + }; + + if (source.EntryPublishedSelector != string.Empty) { + HtmlNode publishedNode = nextNode.QuerySelector(source.EntryPublishedSelector); + nextItem.Published = DateTime.Parse( + source.EntryPublishedAttribute == string.Empty + ? publishedNode.InnerText + : publishedNode.Attributes[source.EntryPublishedAttribute].DeEntitizeValue + ); + + } + if (source.EntryPublishedSelector != string.Empty) { + HtmlNode lastUpdatedNode = nextNode.QuerySelector(source.EntryLastUpdatedSelector); + nextItem.Published = DateTime.Parse( + source.EntryLastUpdatedAttribute == string.Empty + ? lastUpdatedNode.InnerText + : lastUpdatedNode.Attributes[source.EntryLastUpdatedAttribute].DeEntitizeValue + ); + } + } } } } diff --git a/PolyFeed/FeedSource.cs b/PolyFeed/FeedSource.cs index 1fe2100..ffd629e 100644 --- a/PolyFeed/FeedSource.cs +++ b/PolyFeed/FeedSource.cs @@ -1,4 +1,5 @@ using System; + namespace PolyFeed { public enum SourceType { HTML, XML, JSON }; @@ -10,31 +11,43 @@ namespace PolyFeed /// /// The URL. public string Url { get; set; } - /// - /// The title of the feed. - /// - public string Title { get; set; } + /// /// The type of source document to expect. /// public SourceType SourceType { get; set; } + /// + /// The title of the feed. + /// Supports the same {} syntax as . + /// + public string Title { get; set; } + /// + /// The subtitle of the feed. + /// Supports the same {} syntax as . + /// + /// The subtitle. + public string Subtitle { get; set; } + + + #region Entries + /// /// A selector that matches against an element that contains the URL that an /// entry should link to. /// Relative to the element selected by . /// - public string UrlSelector { get; set; } + public string EntryUrlSelector { get; set; } /// - /// The name of the attribute on the element selected by . + /// The name of the attribute on the element selected by . /// Set to an empty string to select the content of the element instead of the /// content of an attribute. /// - public string UrlElementAttribute { get; set; } = ""; + public string EntryUrlAttribute { get; set; } = ""; /// - /// The selector that specifies the location in the object model of nodes that should - /// be added to the feed. + /// The selector that specifies the location of nodes in the object model that + /// should be added to the feed. /// The format varies depending on the . /// - HTML: CSS selector (e.g. main > article) /// - XML: XPath (e.g. //element_name) @@ -52,5 +65,29 @@ namespace PolyFeed /// Same as , but for the body of an entry. HTML is allowed. /// public string EntryContent { get; set; } + + /// + /// The selector for the node that contains the date published for an entry. + /// + public string EntryPublishedSelector { get; set; } + + /// + /// The name of the attribute that contains the date published for an entry. + /// Set to to use the content of the node itself. + /// + public string EntryPublishedAttribute { get; set; } + + /// + /// Same as , but for the last updated. + /// If not specified, the last updated will be omitted. + /// + public string EntryLastUpdatedSelector { get; set; } + /// + /// Same as . + /// + public string EntryLastUpdatedAttribute { get; set; } + + #endregion + } } diff --git a/PolyFeed/PolyFeed.csproj b/PolyFeed/PolyFeed.csproj index e6489c7..87cbe20 100644 --- a/PolyFeed/PolyFeed.csproj +++ b/PolyFeed/PolyFeed.csproj @@ -34,7 +34,7 @@ ..\packages\Fizzler.1.2.0\lib\netstandard2.0\Fizzler.dll - ..\packages\HtmlAgilityPack.1.11.9\lib\Net45\HtmlAgilityPack.dll + ..\packages\HtmlAgilityPack.1.11.12\lib\Net45\HtmlAgilityPack.dll ..\packages\Microsoft.Win32.Primitives.4.3.0\lib\net46\Microsoft.Win32.Primitives.dll @@ -136,10 +136,20 @@ + + + + + + + + + + \ No newline at end of file diff --git a/PolyFeed/Program.cs b/PolyFeed/Program.cs index f62d6b2..ddda4bc 100644 --- a/PolyFeed/Program.cs +++ b/PolyFeed/Program.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.IO; using System.Reflection; -namespace ProjectNamespace +namespace PolyFeed { internal class Settings { @@ -67,7 +67,7 @@ namespace ProjectNamespace #region Helper Methods - private static string getProgramVersion() + public static string getProgramVersion() { Version version = Assembly.GetExecutingAssembly().GetName().Version; return $"{version.Major}.{version.Minor}"; diff --git a/PolyFeed/ReferenceSubstitutor.cs b/PolyFeed/ReferenceSubstitutor.cs new file mode 100644 index 0000000..e6d22b9 --- /dev/null +++ b/PolyFeed/ReferenceSubstitutor.cs @@ -0,0 +1,44 @@ +using System; +using System.Text; +using Fizzler.Systems.HtmlAgilityPack; +using HtmlAgilityPack; +using Salamander.Core.Lexer; + +namespace PolyFeed +{ + internal static class ReferenceSubstitutor { + private static LexerPool lexerPool = new LexerPool(); + + public static string Replace(string inputString, HtmlNode rootElement) + { + StringBuilder result = new StringBuilder(); + SubstitutionLexer lexer = lexerPool.AcquireLexer(); + lexer.Initialise(inputString); + + foreach (LexerToken nextToken in lexer.TokenStream()) + { + switch (nextToken.Type) { + case SubstitutionToken.BraceOpen: + lexer.SaveRuleStates(); + lexer.EnableRule(SubstitutionToken.Identifier); + lexer.DisableRule(SubstitutionToken.Text); + break; + case SubstitutionToken.BraceClose: + lexer.RestoreRuleStates(); + break; + + case SubstitutionToken.Text: + result.Append(nextToken.Value); + break; + + case SubstitutionToken.Identifier: + result.Append(rootElement.QuerySelector(nextToken.Value)); + break; + } + } + lexerPool.ReleaseLexer(lexer); + + return result.ToString(); + } + } +} diff --git a/PolyFeed/Salamander.Core/Ansi.cs b/PolyFeed/Salamander.Core/Ansi.cs new file mode 100644 index 0000000..40f5e87 --- /dev/null +++ b/PolyFeed/Salamander.Core/Ansi.cs @@ -0,0 +1,49 @@ +using System; + +namespace Salamander.Core.Helpers +{ + public static class Ansi + { + /// + /// Whether we should *actually* emit ANSI escape codes or not. + /// Useful when we want to output to a log file, for example. + /// + public static bool Enabled { get; set; } = true; + + // Solution on how to output ANSI escape codes in C# from here: + // https://www.jerriepelser.com/blog/using-ansi-color-codes-in-net-console-apps + public static string Reset => Enabled ? "\u001b[0m" : ""; + public static string HiCol => Enabled ? "\u001b[1m" : ""; + public static string Underline => Enabled ? "\u001b[4m" : ""; + public static string Inverse => Enabled ? "\u001b[7m" : ""; + + public static string FBlack => Enabled ? "\u001b[30m" : ""; + public static string FRed => Enabled ? "\u001b[31m" : ""; + public static string FGreen => Enabled ? "\u001b[32m" : ""; + public static string FYellow => Enabled ? "\u001b[33m" : ""; + public static string FBlue => Enabled ? "\u001b[34m" : ""; + public static string FMagenta => Enabled ? "\u001b[35m" : ""; + public static string FCyan => Enabled ? "\u001b[36m" : ""; + public static string FWhite => Enabled ? "\u001b[37m" : ""; + + public static string BBlack => Enabled ? "\u001b[40m" : ""; + public static string BRed => Enabled ? "\u001b[41m" : ""; + public static string BGreen => Enabled ? "\u001b[42m" : ""; + public static string BYellow => Enabled ? "\u001b[43m" : ""; + public static string BBlue => Enabled ? "\u001b[44m" : ""; + public static string BMagenta => Enabled ? "\u001b[45m" : ""; + public static string BCyan => Enabled ? "\u001b[46m" : ""; + public static string BWhite => Enabled ? "\u001b[47m" : ""; + + // Thanks to http://ascii-table.com/ansi-escape-sequences.php for the following ANSI escape sequences + public static string Up(int lines = 1) => Enabled ? $"\u001b[{lines}A" : ""; + public static string Down(int lines = 1) => Enabled ? $"\u001b[{lines}B" : ""; + public static string Right(int lines = 1) => Enabled ? $"\u001b[{lines}C" : ""; + public static string Left(int lines = 1) => Enabled ? $"\u001b[{lines}D" : ""; + + //public static string JumpTo(Vector2 pos) => $"\u001b[{pos.Y};{pos.X}H" : ""; + + public static string CursorPosSave => Enabled ? $"\u001b[s" : ""; + public static string CursorPosRestore => Enabled ? $"\u001b[u" : ""; + } +} diff --git a/PolyFeed/Salamander.Core/Lexer.cs b/PolyFeed/Salamander.Core/Lexer.cs new file mode 100644 index 0000000..ea9618e --- /dev/null +++ b/PolyFeed/Salamander.Core/Lexer.cs @@ -0,0 +1,328 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Text.RegularExpressions; +using Salamander.Core.Helpers; + +namespace Salamander.Core.Lexer +{ + public class Lexer + { + /// + /// The rules that should be used during the lexing process. + /// + public List> Rules { get; private set; } = new List>(); + /// + /// Tokens in this list will be matched against, but not emitted by the lexer + /// into the main token stream. + /// Useful for catching and disposing of sequences of characters you don't want escaping + /// or breaking your parser. + /// + public List IgnoreTokens { get; private set; } = new List(); + + /// + /// Whether the lexer should be verbose and log a bunch of debugging information + /// to the console. + /// + public bool Verbose { get; set; } = false; + + /// + /// The number of the line that currently being scanned. + /// + public int CurrentLineNumber { get; private set; } = 0; + /// + /// The number of characters on the current line that have been scanned. + /// + /// The current line position. + public int CurrentLinePos { get; private set; } = 0; + /// + /// The total number of characters currently scanned by this lexer instance. + /// Only updated every newline! + /// + public int TotalCharsScanned { get; private set; } = 0; + + /// + /// The internal stream that we should read from when lexing. + /// + private StreamReader textStream; + + /// + /// A stack of rule states. + /// Whether rules are enabled or disabled can be recursively saved and restored - + /// this is how the lexer saves this information. + /// + private Stack, bool>> EnabledStateStack = new Stack, bool>>(); + + /// + /// Creates a new , optionally containing the given + /// instances. + /// + /// The rules to add to the new . + public Lexer(params LexerRule[] initialRules) + { + AddRules(initialRules); + } + + /// + /// Adds a single lexing rule to the . + /// + /// The rule to add. + public void AddRule(LexerRule newRule) + => Rules.Add(newRule); + /// + /// Adds a bunch of lexing rules to the . + /// + /// The rules to add. + public void AddRules(IEnumerable> newRules) + => Rules.AddRange(newRules); + + /// + /// Reinitialises the parser with a new input stream. + /// + /// + /// Child classes should override this method to do their own state initialisation, + /// as lexers MAY be re-used on multiple input streams. + /// Implementors must be careful not to forget to call this base method though. + /// + /// The to use as the new input stream.. + public virtual void Initialise(StreamReader reader) + { + // Reset the counters + CurrentLineNumber = 0; + CurrentLinePos = 0; + TotalCharsScanned = 0; + + // Reset the state stack + EnabledStateStack.Clear(); + + // Re-enable all rules + EnableAllRules(); + + textStream = reader; + } + public void Initialise(string input) + { + MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(input)); + Initialise(new StreamReader(stream)); + } + + /// + /// Performs the lexing process itself in an incremental manner. + /// Note that a single Lexer may only do a single lex at a time - even if it's the + /// same document multiple times over. + /// + /// A stream of lexical tokens. + public IEnumerable> TokenStream() + { + string nextLine; + List> matches = new List>(); + while ((nextLine = textStream.ReadLine()) != null) + { + CurrentLinePos = 0; + + while (CurrentLinePos < nextLine.Length) + { + matches.Clear(); + foreach (LexerRule rule in Rules) + { + if (!rule.Enabled) continue; + + Match nextMatch = rule.RegEx.Match(nextLine, CurrentLinePos); + if (!nextMatch.Success) continue; + + matches.Add( + new LexerToken(rule, nextMatch) + { + LineNumber = CurrentLineNumber, + ColumnNumber = nextMatch.Index + } + ); + } + + if (matches.Count == 0) + { + string unknownTokenContent = nextLine.Substring(CurrentLinePos); + if (Verbose) Console.WriteLine($"{Ansi.FRed}[Unknown Token: No matches found for this line]{Ansi.Reset} {0}", unknownTokenContent); + yield return new LexerToken(unknownTokenContent) + { + LineNumber = CurrentLineNumber, + ColumnNumber = CurrentLinePos + }; + break; + } + + matches.Sort((LexerToken a, LexerToken b) => { + // Match of offset position position + int result = a.ColumnNumber - b.ColumnNumber; + // If they both start at the same position, then go with highest priority one + if (result == 0) + result = b.Rule.Priority - a.Rule.Priority; + // Failing that, try the longest one + if (result == 0) + result = b.RegexMatch.Length - a.RegexMatch.Length; + + return result; + }); + LexerToken selectedToken = matches[0]; + int selectedTokenOffset = nextLine.IndexOf(selectedToken.RegexMatch.Value, CurrentLinePos) - CurrentLinePos; + + if (selectedTokenOffset > 0) + { + string extraTokenContent = nextLine.Substring(CurrentLinePos, selectedTokenOffset); + int unmatchedLinePos = CurrentLinePos; + CurrentLinePos += selectedTokenOffset; + if (Verbose) Console.WriteLine($"{Ansi.FRed}[Unmatched content]{Ansi.Reset} '{extraTokenContent}'"); + // Return the an unknown token, but only if we're not meant to be ignoring them + if (!IgnoreTokens.Contains((TokenType)Enum.ToObject(typeof(TokenType), 0))) + { + yield return new LexerToken(extraTokenContent) + { + LineNumber = CurrentLineNumber, + ColumnNumber = unmatchedLinePos + }; + } + } + + CurrentLinePos += selectedToken.RegexMatch.Length; + if (Verbose) Console.WriteLine($"{(IgnoreTokens.Contains(selectedToken.Type) ? Ansi.FBlack : Ansi.FGreen)}{selectedToken}{Ansi.Reset}"); + + // Yield the token, but only if we aren't supposed to be ignoring it + if (IgnoreTokens.Contains(selectedToken.Type)) + continue; + yield return selectedToken; + } + + if (Verbose) Console.WriteLine($"{Ansi.FBlue}[Lexer]{Ansi.Reset} Next line"); + CurrentLineNumber++; + TotalCharsScanned += CurrentLinePos; + } + } + + + #region Rule Management + + /// + /// Enables all s currently registered against + /// this Lexer. + /// + public void EnableAllRules() => EnableRulesByPrefix(""); + /// + /// Disables all s currently registered against + /// this Lexer. + /// + public void DisableAllRules() => DisableRulesByPrefix(""); + + /// + /// Enables the rule that matches against the given . + /// + /// The token type to use to find the rule to enable. + public void EnableRule(TokenType type) => SetRule(type, true); + /// + /// Disables the rule that matches against the given . + /// + /// The token type to use to find the rule to disable. + public void DisableRule(TokenType type) => SetRule(type, false); + + /// + /// Sets the enabled status of the rule that matches against the given + /// to the given state. + /// + /// The to use to find the rule to + /// sets the enabled state of. + /// Whether to enable or disable the rule. = enable it, = disable it. + public void SetRule(TokenType type, bool state) + { + foreach (LexerRule rule in Rules) + { + // We have to do a string comparison here because of the generic type we're using in multiple nested + // classes + if (Enum.GetName(rule.Type.GetType(), rule.Type) == Enum.GetName(type.GetType(), type)) + { + rule.Enabled = state; + return; + } + } + } + + /// + /// Toggles the enabled status of multiple rules by finding rules that generate + /// tokens whose name begins with a specific substring. + /// + /// The prefix to use when finding rules to toggle. + public void ToggleRulesByPrefix(string tokenTypePrefix) + { + foreach (LexerRule rule in Rules) + { + // We have to do a string comparison here because of the generic type we're using in multiple nested + // classes + if (Enum.GetName(rule.Type.GetType(), rule.Type).StartsWith(tokenTypePrefix, StringComparison.CurrentCulture)) + rule.Enabled = !rule.Enabled; + } + } + /// + /// Enables multiple rules by finding rules that generate + /// tokens whose name begins with a specific substring. + /// + /// The prefix to use when finding rules to enable. + public void EnableRulesByPrefix(string tokenTypePrefix) + => SetRulesByPrefix(tokenTypePrefix, true); + /// + /// Disables multiple rules by finding rules that generate + /// tokens whose name begins with a specific substring. + /// + /// The prefix to use when finding rules to disable. + public void DisableRulesByPrefix(string tokenTypePrefix) + => SetRulesByPrefix(tokenTypePrefix, false); + + /// + /// Set the enabled status of multiple rules by finding rules that generate + /// tokens whose name begins with a specific substring. + /// + /// The prefix to use when finding rules to set the + /// status of. + public void SetRulesByPrefix(string tokenTypePrefix, bool state) + { + foreach (LexerRule rule in Rules) + { + // We have to do a string comparison here because of the generic type we're using in multiple nested + // classes + if (Enum.GetName(rule.Type.GetType(), rule.Type).StartsWith(tokenTypePrefix, StringComparison.CurrentCulture)) + { + //if(Verbose) Console.WriteLine($"{Ansi.FBlue}[Lexer/Rules] {Ansi.FCyan}Setting {rule.Type} to {state}"); + rule.Enabled = state; + } + } + } + + /// + /// Saves the current rule states (i.e. whether they are enabled or not) as a snapshot to an + /// internal stack. + /// + public void SaveRuleStates() + { + Dictionary, bool> states = new Dictionary, bool>(); + foreach (LexerRule nextRule in Rules) + states[nextRule] = nextRule.Enabled; + + EnabledStateStack.Push(states); + } + /// + /// Restores the top-most rule states snapshot from the internal stack. + /// + /// Thrown if there aren't any states left on the stack to restore. + public void RestoreRuleStates() + { + if (EnabledStateStack.Count < 1) + throw new InvalidOperationException("Error: Can't restore the lexer rule states when no states have been saved!"); + + Dictionary, bool> states = EnabledStateStack.Pop(); + foreach (KeyValuePair, bool> nextRulePair in states) + nextRulePair.Key.Enabled = nextRulePair.Value; + } + + + #endregion + + } +} diff --git a/PolyFeed/Salamander.Core/LexerPool.cs b/PolyFeed/Salamander.Core/LexerPool.cs new file mode 100644 index 0000000..e8880d7 --- /dev/null +++ b/PolyFeed/Salamander.Core/LexerPool.cs @@ -0,0 +1,34 @@ +using System; +using System.Collections.Generic; + +namespace Salamander.Core.Lexer +{ + /// + /// Represents a pool of reusable s. + /// Useful to avoid memory churn when lexing lots of different input streams. + /// + public class LexerPool where T : Lexer, new() + { + private List freeLexers = new List(); + + public LexerPool() + { + } + + public T AcquireLexer() + { + if (freeLexers.Count > 0) + { + T lexer = freeLexers[0]; + freeLexers.Remove(lexer); + return lexer; + } + return new T(); + } + + public void ReleaseLexer(T lexer) + { + freeLexers.Add(lexer); + } + } +} diff --git a/PolyFeed/Salamander.Core/LexerRule.cs b/PolyFeed/Salamander.Core/LexerRule.cs new file mode 100644 index 0000000..96437e8 --- /dev/null +++ b/PolyFeed/Salamander.Core/LexerRule.cs @@ -0,0 +1,52 @@ +using System; +using System.Text.RegularExpressions; + +namespace Salamander.Core.Lexer +{ + public class LexerRule + { + /// + /// The token type that a match against this rule should generate. + /// + public readonly TokenType Type; + /// + /// The regular expression to use to find matches. + /// + public readonly Regex RegEx; + /// + /// The priority of this rule. + /// + /// + /// If there are multiple matches, then the one with the highest priority will be matched + /// against first. + /// Failing that, the longest match will be taken first. + /// Note that even if a match has a higher priority, a match from a lower priority rule + /// will be used instead if it occurs earlier in the source, as this will result in fewer + /// unmatched characters. + /// + public int Priority { get; set; } = 0; + /// + /// Whether this rule is currently enabled or not. This can be changed on-the-fly whilst lexing. + /// Sometimes useful when handling more complicated logic. + /// Be careful though, as if you start needing this, perhaps you should evaluate whether + /// utilising the fuller capabilities of the parser would be more appropriate instead. + /// + public bool Enabled { get; set; } = true; + + public LexerRule(TokenType inName, string inRegEx, RegexOptions inRegexOptions = RegexOptions.None, int inPriority = 0) + { + if (!typeof(TokenType).IsEnum) + throw new ArgumentException($"Error: inName must be an enum - {typeof(TokenType)} passed"); + + Type = inName; + RegEx = new Regex(inRegEx, inRegexOptions | RegexOptions.Compiled); + Priority = inPriority; + } + + public bool Toggle() + { + Enabled = !Enabled; + return Enabled; + } + } +} diff --git a/PolyFeed/Salamander.Core/LexerToken.cs b/PolyFeed/Salamander.Core/LexerToken.cs new file mode 100644 index 0000000..635cdbf --- /dev/null +++ b/PolyFeed/Salamander.Core/LexerToken.cs @@ -0,0 +1,76 @@ +using System; +using System.Text.RegularExpressions; + +namespace Salamander.Core.Lexer +{ + public class LexerToken + { + private int _lineNumber = -1, _columnNumber = -1; + public int LineNumber { + get => _lineNumber; + set { + if (_lineNumber != -1) + throw new InvalidOperationException("Can't overwrite existing line number data"); + if (value < 0) + throw new ArgumentException("Error: Negative line numbers don't make sense."); + + _lineNumber = value; + } + } + public int ColumnNumber { + get => _columnNumber; + set { + if(_columnNumber != -1) + throw new InvalidOperationException("Can't overwrite existing column number data"); + if(value < 0) + throw new ArgumentException("Error: Negative column numbers don't make sense."); + + _columnNumber = value; + } + } + + public readonly bool IsNullMatch = false; + public readonly LexerRule Rule = null; + public readonly Match RegexMatch; + + public TokenType Type { + get { + try + { + return Rule.Type; + } + catch (NullReferenceException) + { + return default(TokenType); + } + } + } + private string nullValueData; + public string Value { + get { + return IsNullMatch ? nullValueData : RegexMatch.Value; + } + } + + public LexerToken(LexerRule inRule, Match inMatch) + { + Rule = inRule; + RegexMatch = inMatch; + } + public LexerToken(string unknownData) + { + IsNullMatch = true; + nullValueData = unknownData; + } + + + #region Overrides + + public override string ToString() + { + return $"[LexerToken @ {LineNumber}:{ColumnNumber} Type={Type}, Value={Value}]"; + } + + #endregion + } +} diff --git a/PolyFeed/SubstitutionLexer.cs b/PolyFeed/SubstitutionLexer.cs new file mode 100644 index 0000000..8240cc7 --- /dev/null +++ b/PolyFeed/SubstitutionLexer.cs @@ -0,0 +1,39 @@ +using System; +using System.Collections.Generic; +using System.IO; +using Salamander.Core.Lexer; + +namespace PolyFeed +{ + internal enum SubstitutionToken + { + Unknown = 0, + + Text, + + BraceOpen, + BraceClose, + Identifier + + } + + internal class SubstitutionLexer : Lexer + { + public SubstitutionLexer() + { + AddRules(new List>() { + new LexerRule(SubstitutionToken.Text, @"[^{}]+"), + new LexerRule(SubstitutionToken.Identifier, @"[^{}]+"), + new LexerRule(SubstitutionToken.BraceOpen, @"\{"), + new LexerRule(SubstitutionToken.BraceClose, @"\}"), + }); + } + + public override void Initialise(StreamReader reader) + { + base.Initialise(reader); + + DisableRule(SubstitutionToken.Identifier); + } + } +} diff --git a/PolyFeed/packages.config b/PolyFeed/packages.config index 7e72b7b..0756eba 100644 --- a/PolyFeed/packages.config +++ b/PolyFeed/packages.config @@ -2,7 +2,7 @@ - +