Complete refactoring to interface-based provider discovery

This commit is contained in:
Starbeamrainbowlabs 2019-08-11 15:03:50 +01:00
parent 0afff60345
commit 6944d87726
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
6 changed files with 131 additions and 94 deletions

View File

@ -2,6 +2,7 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.Net; using System.Net;
using System.Reflection;
using System.Text; using System.Text;
using System.Threading.Tasks; using System.Threading.Tasks;
using System.Xml; using System.Xml;
@ -10,6 +11,7 @@ using HtmlAgilityPack;
using Microsoft.SyndicationFeed; using Microsoft.SyndicationFeed;
using Microsoft.SyndicationFeed.Atom; using Microsoft.SyndicationFeed.Atom;
using PolyFeed.Helpers; using PolyFeed.Helpers;
using PolyFeed.ParserProviders;
namespace PolyFeed namespace PolyFeed
{ {
@ -49,98 +51,30 @@ namespace PolyFeed
string contentType = response.Headers.Get("content-type"); string contentType = response.Headers.Get("content-type");
switch (source.Feed.Type) { IParserProvider provider = GetProvider(source.Feed.SourceType);
case SourceType.HTML: if(provider == null)
await AddSourceHtml(source, response); throw new ApplicationException($"Error: A provider for the source type {source.Feed.SourceType} wasn't found.");
break;
default: provider.SetOutputFeed(feed, xml);
throw new NotImplementedException($"Error: The source type {source.Feed.Type} hasn't been implemented yet."); await provider.ParseWebResponse(source, response);
}
await Console.Error.WriteLineAsync("[Builder] Done!"); await Console.Error.WriteLineAsync("[Builder] Done!");
} }
private async Task AddSourceHtml(FeedSource source, WebResponse response) { private IParserProvider GetProvider(string identifier)
await Console.Error.WriteLineAsync("[Builder/Html] Parsing Html"); {
IEnumerable<Type> possibleTypes = ReflectionUtilities.IterateImplementingTypes(
typeof(IParserProvider),
Assembly.GetExecutingAssembly()
);
// Parse the HTML foreach (Type next in possibleTypes) {
HtmlDocument html = new HtmlDocument(); IParserProvider candidate = (IParserProvider)Activator.CreateInstance(next);
using (StreamReader reader = new StreamReader(response.GetResponseStream())) if (candidate.Identifier == identifier)
html.LoadHtml(await reader.ReadToEndAsync()); return candidate;
HtmlNode document = html.DocumentNode;
document.AbsolutifyUris(new Uri(source.Feed.Url));
await Console.Error.WriteLineAsync("[Builder/Html] Generating feed content");
// Add the title
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Feed.Title, document));
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Feed.Subtitle, document));
// Add the logo
if (source.Feed.Logo != null) {
HtmlNode logoNode = document.QuerySelector(source.Feed.Logo.Selector);
xml.WriteElementString("logo", logoNode.Attributes[source.Feed.Logo.Attribute].Value);
} }
// Add the feed entries return null;
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.Entries.Selector)) {
HtmlNode urlNode = nextNode.QuerySelector(source.Entries.Url.Selector);
if (urlNode == null)
throw new ApplicationException("Error: Failed to match entry url selector against an element.");
string url = source.Entries.Url.Attribute == string.Empty ?
urlNode.InnerText : urlNode.Attributes[source.Entries.Url.Attribute].DeEntitizeValue;
Uri entryUri = new Uri(new Uri(source.Feed.Url), new Uri(url));
AtomEntry nextItem = new AtomEntry() {
Id = entryUri.ToString(),
Title = ReferenceSubstitutor.Replace(source.Entries.Title, nextNode),
Description = ReferenceSubstitutor.Replace(source.Entries.Content, nextNode),
ContentType = "html"
};
nextItem.AddLink(new SyndicationLink(entryUri, AtomLinkTypes.Alternate));
if (source.Entries.Published != null) {
nextItem.Published = DateTime.Parse(
nextNode.QuerySelectorAttributeOrText(
source.Entries.Published
)
);
}
if (source.Entries.LastUpdated != null) {
nextItem.LastUpdated = DateTime.Parse(
nextNode.QuerySelectorAttributeOrText(
source.Entries.LastUpdated
)
);
}
else if (source.Entries.Published != null) // Use the publish date if available
nextItem.LastUpdated = nextItem.Published;
else // It requires one, apparently
nextItem.LastUpdated = DateTimeOffset.Now;
if (source.Entries.AuthorName != null) {
SyndicationPerson author = new SyndicationPerson(
nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorName).Trim(),
""
);
if (source.Entries.AuthorUrl != null)
author.Uri = nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorUrl);
nextItem.AddContributor(author);
}
else
nextItem.AddContributor(new SyndicationPerson("Unknown", ""));
await feed.Write(nextItem);
}
} }
public async Task<string> Render() public async Task<string> Render()

View File

@ -37,7 +37,6 @@ namespace PolyFeed
/// The type of source document to expect. /// The type of source document to expect.
/// </summary> /// </summary>
public string SourceType { get; set; } public string SourceType { get; set; }
public SourceType Type => (SourceType)Enum.Parse(typeof(SourceType), SourceType, true);
/// <summary> /// <summary>

View File

@ -77,6 +77,10 @@ namespace PolyFeed.Helpers
// FUTURE: Add caching here? Reflection is slow // FUTURE: Add caching here? Reflection is slow
foreach (Type nextType in IterateAllLoadedTypes()) foreach (Type nextType in IterateAllLoadedTypes())
{ {
// Interfaces implement themselves, but we don't want to return the interface itself
if (nextType == targetInterface)
continue;
// Make sure it implements the specified interface // Make sure it implements the specified interface
if (!targetInterface.IsAssignableFrom(nextType)) if (!targetInterface.IsAssignableFrom(nextType))
continue; continue;

View File

@ -1,23 +1,119 @@
using System; using System;
using System.IO;
using System.Net; using System.Net;
using System.Threading.Tasks;
using System.Xml;
using Fizzler.Systems.HtmlAgilityPack;
using HtmlAgilityPack;
using Microsoft.SyndicationFeed;
using Microsoft.SyndicationFeed.Atom; using Microsoft.SyndicationFeed.Atom;
using PolyFeed.Helpers;
namespace PolyFeed.ParserProviders namespace PolyFeed.ParserProviders
{ {
public class HtmlParserProvider : IParserProvider public class HtmlParserProvider : IParserProvider
{ {
private XmlWriter xml = null;
private AtomFeedWriter feed = null;
public string Identifier => "html";
public HtmlParserProvider() public HtmlParserProvider()
{ {
} }
public void ParseWebResponse(FeedSource source, WebResponse response) public void SetOutputFeed(AtomFeedWriter inFeed, XmlWriter inXml) {
{ xml = inXml;
throw new NotImplementedException(); feed = inFeed;
} }
public void SetOutputFeed(AtomFeedWriter feed) public async Task ParseWebResponse(FeedSource source, WebResponse response)
{ {
throw new NotImplementedException(); await Console.Error.WriteLineAsync("[Builder/Html] Parsing Html");
// Parse the HTML
HtmlDocument html = new HtmlDocument();
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
html.LoadHtml(await reader.ReadToEndAsync());
HtmlNode document = html.DocumentNode;
document.AbsolutifyUris(new Uri(source.Feed.Url));
await Console.Error.WriteLineAsync("[Builder/Html] Generating feed content");
// Add the title
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Feed.Title, document));
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Feed.Subtitle, document));
// Add the logo
if (source.Feed.Logo != null) {
HtmlNode logoNode = document.QuerySelector(source.Feed.Logo.Selector);
xml.WriteElementString("logo", logoNode.Attributes[source.Feed.Logo.Attribute].Value);
}
// Add the feed entries
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.Entries.Selector))
{
await addEntry(source, nextNode);
}
}
private async Task addEntry(FeedSource source, HtmlNode nextNode)
{
HtmlNode urlNode = nextNode.QuerySelector(source.Entries.Url.Selector);
if (urlNode == null)
throw new ApplicationException("Error: Failed to match entry url selector against an element.");
string url = source.Entries.Url.Attribute == string.Empty ?
urlNode.InnerText : urlNode.Attributes[source.Entries.Url.Attribute].DeEntitizeValue;
Uri entryUri = new Uri(new Uri(source.Feed.Url), new Uri(url));
AtomEntry nextItem = new AtomEntry() {
Id = entryUri.ToString(),
Title = ReferenceSubstitutor.Replace(source.Entries.Title, nextNode),
Description = ReferenceSubstitutor.Replace(source.Entries.Content, nextNode),
ContentType = "html"
};
nextItem.AddLink(new SyndicationLink(entryUri, AtomLinkTypes.Alternate));
if (source.Entries.Published != null) {
nextItem.Published = DateTime.Parse(
nextNode.QuerySelectorAttributeOrText(
source.Entries.Published
)
);
}
if (source.Entries.LastUpdated != null) {
nextItem.LastUpdated = DateTime.Parse(
nextNode.QuerySelectorAttributeOrText(
source.Entries.LastUpdated
)
);
}
else if (source.Entries.Published != null) // Use the publish date if available
nextItem.LastUpdated = nextItem.Published;
else // It requires one, apparently
nextItem.LastUpdated = DateTimeOffset.Now;
if (source.Entries.AuthorName != null) {
SyndicationPerson author = new SyndicationPerson(
nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorName).Trim(),
""
);
if (source.Entries.AuthorUrl != null)
author.Uri = nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorUrl);
nextItem.AddContributor(author);
}
else
nextItem.AddContributor(new SyndicationPerson("Unknown", ""));
await feed.Write(nextItem);
} }
} }
} }

View File

@ -1,5 +1,7 @@
using System; using System;
using System.Net; using System.Net;
using System.Threading.Tasks;
using System.Xml;
using Microsoft.SyndicationFeed.Atom; using Microsoft.SyndicationFeed.Atom;
namespace PolyFeed.ParserProviders namespace PolyFeed.ParserProviders
@ -11,6 +13,7 @@ namespace PolyFeed.ParserProviders
/// </summary> /// </summary>
public interface IParserProvider public interface IParserProvider
{ {
/// <summary> /// <summary>
/// The identifier of this provider. /// The identifier of this provider.
/// Used in the .toml configuration file to specify which parser to use. /// Used in the .toml configuration file to specify which parser to use.
@ -21,12 +24,13 @@ namespace PolyFeed.ParserProviders
/// Sets the output feed that parsed output should be written to. /// Sets the output feed that parsed output should be written to.
/// </summary> /// </summary>
/// <param name="feed">The output feed writer that output should be written to.</param> /// <param name="feed">The output feed writer that output should be written to.</param>
void SetOutputFeed(AtomFeedWriter feed); /// <param name="xml">The underlying XML feed try not to use this unless you *really* have to.</param>
void SetOutputFeed(AtomFeedWriter feed, XmlWriter xml);
/// <summary> /// <summary>
/// Parses a web response that's paired with a given <see cref="FeedSource" />. /// Parses a web response that's paired with a given <see cref="FeedSource" />.
/// </summary> /// </summary>
/// <param name="source">The <see cref="FeedSource"/> object that the <paramref name="response"/> was generated from.</param> /// <param name="source">The <see cref="FeedSource"/> object that the <paramref name="response"/> was generated from.</param>
/// <param name="response">The <see cref="WebResponse"/> in question needs parsing.</param> /// <param name="response">The <see cref="WebResponse"/> in question needs parsing.</param>
void ParseWebResponse(FeedSource source, WebResponse response); Task ParseWebResponse(FeedSource source, WebResponse response);
} }
} }

View File

@ -17,7 +17,7 @@ using System.Runtime.CompilerServices;
// The form "{Major}.{Minor}.*" will automatically update the build and revision, // The form "{Major}.{Minor}.*" will automatically update the build and revision,
// and "{Major}.{Minor}.{Build}.*" will update just the revision. // and "{Major}.{Minor}.{Build}.*" will update just the revision.
[assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("0.1.1.*")]
// The following attributes are used to specify the signing key for the assembly, // The following attributes are used to specify the signing key for the assembly,
// if desired. See the Mono documentation for more information about signing. // if desired. See the Mono documentation for more information about signing.