Complete refactoring to interface-based provider discovery

This commit is contained in:
Starbeamrainbowlabs 2019-08-11 15:03:50 +01:00
parent 0afff60345
commit 6944d87726
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
6 changed files with 131 additions and 94 deletions

View File

@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Reflection;
using System.Text;
using System.Threading.Tasks;
using System.Xml;
@ -10,6 +11,7 @@ using HtmlAgilityPack;
using Microsoft.SyndicationFeed;
using Microsoft.SyndicationFeed.Atom;
using PolyFeed.Helpers;
using PolyFeed.ParserProviders;
namespace PolyFeed
{
@ -49,98 +51,30 @@ namespace PolyFeed
string contentType = response.Headers.Get("content-type");
switch (source.Feed.Type) {
case SourceType.HTML:
await AddSourceHtml(source, response);
break;
default:
throw new NotImplementedException($"Error: The source type {source.Feed.Type} hasn't been implemented yet.");
}
IParserProvider provider = GetProvider(source.Feed.SourceType);
if(provider == null)
throw new ApplicationException($"Error: A provider for the source type {source.Feed.SourceType} wasn't found.");
provider.SetOutputFeed(feed, xml);
await provider.ParseWebResponse(source, response);
await Console.Error.WriteLineAsync("[Builder] Done!");
}
private async Task AddSourceHtml(FeedSource source, WebResponse response) {
await Console.Error.WriteLineAsync("[Builder/Html] Parsing Html");
private IParserProvider GetProvider(string identifier)
{
IEnumerable<Type> possibleTypes = ReflectionUtilities.IterateImplementingTypes(
typeof(IParserProvider),
Assembly.GetExecutingAssembly()
);
// Parse the HTML
HtmlDocument html = new HtmlDocument();
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
html.LoadHtml(await reader.ReadToEndAsync());
HtmlNode document = html.DocumentNode;
document.AbsolutifyUris(new Uri(source.Feed.Url));
await Console.Error.WriteLineAsync("[Builder/Html] Generating feed content");
// Add the title
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Feed.Title, document));
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Feed.Subtitle, document));
// Add the logo
if (source.Feed.Logo != null) {
HtmlNode logoNode = document.QuerySelector(source.Feed.Logo.Selector);
xml.WriteElementString("logo", logoNode.Attributes[source.Feed.Logo.Attribute].Value);
foreach (Type next in possibleTypes) {
IParserProvider candidate = (IParserProvider)Activator.CreateInstance(next);
if (candidate.Identifier == identifier)
return candidate;
}
// Add the feed entries
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.Entries.Selector)) {
HtmlNode urlNode = nextNode.QuerySelector(source.Entries.Url.Selector);
if (urlNode == null)
throw new ApplicationException("Error: Failed to match entry url selector against an element.");
string url = source.Entries.Url.Attribute == string.Empty ?
urlNode.InnerText : urlNode.Attributes[source.Entries.Url.Attribute].DeEntitizeValue;
Uri entryUri = new Uri(new Uri(source.Feed.Url), new Uri(url));
AtomEntry nextItem = new AtomEntry() {
Id = entryUri.ToString(),
Title = ReferenceSubstitutor.Replace(source.Entries.Title, nextNode),
Description = ReferenceSubstitutor.Replace(source.Entries.Content, nextNode),
ContentType = "html"
};
nextItem.AddLink(new SyndicationLink(entryUri, AtomLinkTypes.Alternate));
if (source.Entries.Published != null) {
nextItem.Published = DateTime.Parse(
nextNode.QuerySelectorAttributeOrText(
source.Entries.Published
)
);
}
if (source.Entries.LastUpdated != null) {
nextItem.LastUpdated = DateTime.Parse(
nextNode.QuerySelectorAttributeOrText(
source.Entries.LastUpdated
)
);
}
else if (source.Entries.Published != null) // Use the publish date if available
nextItem.LastUpdated = nextItem.Published;
else // It requires one, apparently
nextItem.LastUpdated = DateTimeOffset.Now;
if (source.Entries.AuthorName != null) {
SyndicationPerson author = new SyndicationPerson(
nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorName).Trim(),
""
);
if (source.Entries.AuthorUrl != null)
author.Uri = nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorUrl);
nextItem.AddContributor(author);
}
else
nextItem.AddContributor(new SyndicationPerson("Unknown", ""));
await feed.Write(nextItem);
}
return null;
}
public async Task<string> Render()

View File

@ -37,7 +37,6 @@ namespace PolyFeed
/// The type of source document to expect.
/// </summary>
public string SourceType { get; set; }
public SourceType Type => (SourceType)Enum.Parse(typeof(SourceType), SourceType, true);
/// <summary>

View File

@ -77,6 +77,10 @@ namespace PolyFeed.Helpers
// FUTURE: Add caching here? Reflection is slow
foreach (Type nextType in IterateAllLoadedTypes())
{
// Interfaces implement themselves, but we don't want to return the interface itself
if (nextType == targetInterface)
continue;
// Make sure it implements the specified interface
if (!targetInterface.IsAssignableFrom(nextType))
continue;

View File

@ -1,23 +1,119 @@
using System;
using System.IO;
using System.Net;
using System.Threading.Tasks;
using System.Xml;
using Fizzler.Systems.HtmlAgilityPack;
using HtmlAgilityPack;
using Microsoft.SyndicationFeed;
using Microsoft.SyndicationFeed.Atom;
using PolyFeed.Helpers;
namespace PolyFeed.ParserProviders
{
public class HtmlParserProvider : IParserProvider
{
private XmlWriter xml = null;
private AtomFeedWriter feed = null;
public string Identifier => "html";
public HtmlParserProvider()
{
}
public void ParseWebResponse(FeedSource source, WebResponse response)
{
throw new NotImplementedException();
public void SetOutputFeed(AtomFeedWriter inFeed, XmlWriter inXml) {
xml = inXml;
feed = inFeed;
}
public void SetOutputFeed(AtomFeedWriter feed)
public async Task ParseWebResponse(FeedSource source, WebResponse response)
{
throw new NotImplementedException();
await Console.Error.WriteLineAsync("[Builder/Html] Parsing Html");
// Parse the HTML
HtmlDocument html = new HtmlDocument();
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
html.LoadHtml(await reader.ReadToEndAsync());
HtmlNode document = html.DocumentNode;
document.AbsolutifyUris(new Uri(source.Feed.Url));
await Console.Error.WriteLineAsync("[Builder/Html] Generating feed content");
// Add the title
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Feed.Title, document));
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Feed.Subtitle, document));
// Add the logo
if (source.Feed.Logo != null) {
HtmlNode logoNode = document.QuerySelector(source.Feed.Logo.Selector);
xml.WriteElementString("logo", logoNode.Attributes[source.Feed.Logo.Attribute].Value);
}
// Add the feed entries
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.Entries.Selector))
{
await addEntry(source, nextNode);
}
}
private async Task addEntry(FeedSource source, HtmlNode nextNode)
{
HtmlNode urlNode = nextNode.QuerySelector(source.Entries.Url.Selector);
if (urlNode == null)
throw new ApplicationException("Error: Failed to match entry url selector against an element.");
string url = source.Entries.Url.Attribute == string.Empty ?
urlNode.InnerText : urlNode.Attributes[source.Entries.Url.Attribute].DeEntitizeValue;
Uri entryUri = new Uri(new Uri(source.Feed.Url), new Uri(url));
AtomEntry nextItem = new AtomEntry() {
Id = entryUri.ToString(),
Title = ReferenceSubstitutor.Replace(source.Entries.Title, nextNode),
Description = ReferenceSubstitutor.Replace(source.Entries.Content, nextNode),
ContentType = "html"
};
nextItem.AddLink(new SyndicationLink(entryUri, AtomLinkTypes.Alternate));
if (source.Entries.Published != null) {
nextItem.Published = DateTime.Parse(
nextNode.QuerySelectorAttributeOrText(
source.Entries.Published
)
);
}
if (source.Entries.LastUpdated != null) {
nextItem.LastUpdated = DateTime.Parse(
nextNode.QuerySelectorAttributeOrText(
source.Entries.LastUpdated
)
);
}
else if (source.Entries.Published != null) // Use the publish date if available
nextItem.LastUpdated = nextItem.Published;
else // It requires one, apparently
nextItem.LastUpdated = DateTimeOffset.Now;
if (source.Entries.AuthorName != null) {
SyndicationPerson author = new SyndicationPerson(
nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorName).Trim(),
""
);
if (source.Entries.AuthorUrl != null)
author.Uri = nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorUrl);
nextItem.AddContributor(author);
}
else
nextItem.AddContributor(new SyndicationPerson("Unknown", ""));
await feed.Write(nextItem);
}
}
}

View File

@ -1,5 +1,7 @@
using System;
using System.Net;
using System.Threading.Tasks;
using System.Xml;
using Microsoft.SyndicationFeed.Atom;
namespace PolyFeed.ParserProviders
@ -11,6 +13,7 @@ namespace PolyFeed.ParserProviders
/// </summary>
public interface IParserProvider
{
/// <summary>
/// The identifier of this provider.
/// Used in the .toml configuration file to specify which parser to use.
@ -21,12 +24,13 @@ namespace PolyFeed.ParserProviders
/// Sets the output feed that parsed output should be written to.
/// </summary>
/// <param name="feed">The output feed writer that output should be written to.</param>
void SetOutputFeed(AtomFeedWriter feed);
/// <param name="xml">The underlying XML feed try not to use this unless you *really* have to.</param>
void SetOutputFeed(AtomFeedWriter feed, XmlWriter xml);
/// <summary>
/// Parses a web response that's paired with a given <see cref="FeedSource" />.
/// </summary>
/// <param name="source">The <see cref="FeedSource"/> object that the <paramref name="response"/> was generated from.</param>
/// <param name="response">The <see cref="WebResponse"/> in question needs parsing.</param>
void ParseWebResponse(FeedSource source, WebResponse response);
Task ParseWebResponse(FeedSource source, WebResponse response);
}
}

View File

@ -17,7 +17,7 @@ using System.Runtime.CompilerServices;
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
[assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("0.1.1.*")]
// The following attributes are used to specify the signing key for the assembly,
// if desired. See the Mono documentation for more information about signing.