mirror of
https://github.com/sbrl/PolyFeed.git
synced 2024-11-24 06:43:01 +00:00
Complete refactoring to interface-based provider discovery
This commit is contained in:
parent
0afff60345
commit
6944d87726
6 changed files with 131 additions and 94 deletions
|
@ -2,6 +2,7 @@
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Net;
|
using System.Net;
|
||||||
|
using System.Reflection;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using System.Xml;
|
using System.Xml;
|
||||||
|
@ -10,6 +11,7 @@ using HtmlAgilityPack;
|
||||||
using Microsoft.SyndicationFeed;
|
using Microsoft.SyndicationFeed;
|
||||||
using Microsoft.SyndicationFeed.Atom;
|
using Microsoft.SyndicationFeed.Atom;
|
||||||
using PolyFeed.Helpers;
|
using PolyFeed.Helpers;
|
||||||
|
using PolyFeed.ParserProviders;
|
||||||
|
|
||||||
namespace PolyFeed
|
namespace PolyFeed
|
||||||
{
|
{
|
||||||
|
@ -49,98 +51,30 @@ namespace PolyFeed
|
||||||
|
|
||||||
string contentType = response.Headers.Get("content-type");
|
string contentType = response.Headers.Get("content-type");
|
||||||
|
|
||||||
switch (source.Feed.Type) {
|
IParserProvider provider = GetProvider(source.Feed.SourceType);
|
||||||
case SourceType.HTML:
|
if(provider == null)
|
||||||
await AddSourceHtml(source, response);
|
throw new ApplicationException($"Error: A provider for the source type {source.Feed.SourceType} wasn't found.");
|
||||||
break;
|
|
||||||
default:
|
provider.SetOutputFeed(feed, xml);
|
||||||
throw new NotImplementedException($"Error: The source type {source.Feed.Type} hasn't been implemented yet.");
|
await provider.ParseWebResponse(source, response);
|
||||||
}
|
|
||||||
|
|
||||||
await Console.Error.WriteLineAsync("[Builder] Done!");
|
await Console.Error.WriteLineAsync("[Builder] Done!");
|
||||||
}
|
}
|
||||||
|
|
||||||
private async Task AddSourceHtml(FeedSource source, WebResponse response) {
|
private IParserProvider GetProvider(string identifier)
|
||||||
await Console.Error.WriteLineAsync("[Builder/Html] Parsing Html");
|
{
|
||||||
|
IEnumerable<Type> possibleTypes = ReflectionUtilities.IterateImplementingTypes(
|
||||||
// Parse the HTML
|
typeof(IParserProvider),
|
||||||
HtmlDocument html = new HtmlDocument();
|
Assembly.GetExecutingAssembly()
|
||||||
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
|
|
||||||
html.LoadHtml(await reader.ReadToEndAsync());
|
|
||||||
|
|
||||||
HtmlNode document = html.DocumentNode;
|
|
||||||
|
|
||||||
document.AbsolutifyUris(new Uri(source.Feed.Url));
|
|
||||||
|
|
||||||
|
|
||||||
await Console.Error.WriteLineAsync("[Builder/Html] Generating feed content");
|
|
||||||
|
|
||||||
// Add the title
|
|
||||||
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Feed.Title, document));
|
|
||||||
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Feed.Subtitle, document));
|
|
||||||
|
|
||||||
// Add the logo
|
|
||||||
if (source.Feed.Logo != null) {
|
|
||||||
HtmlNode logoNode = document.QuerySelector(source.Feed.Logo.Selector);
|
|
||||||
xml.WriteElementString("logo", logoNode.Attributes[source.Feed.Logo.Attribute].Value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the feed entries
|
|
||||||
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.Entries.Selector)) {
|
|
||||||
HtmlNode urlNode = nextNode.QuerySelector(source.Entries.Url.Selector);
|
|
||||||
if (urlNode == null)
|
|
||||||
throw new ApplicationException("Error: Failed to match entry url selector against an element.");
|
|
||||||
|
|
||||||
string url = source.Entries.Url.Attribute == string.Empty ?
|
|
||||||
urlNode.InnerText : urlNode.Attributes[source.Entries.Url.Attribute].DeEntitizeValue;
|
|
||||||
|
|
||||||
Uri entryUri = new Uri(new Uri(source.Feed.Url), new Uri(url));
|
|
||||||
AtomEntry nextItem = new AtomEntry() {
|
|
||||||
Id = entryUri.ToString(),
|
|
||||||
Title = ReferenceSubstitutor.Replace(source.Entries.Title, nextNode),
|
|
||||||
Description = ReferenceSubstitutor.Replace(source.Entries.Content, nextNode),
|
|
||||||
ContentType = "html"
|
|
||||||
};
|
|
||||||
nextItem.AddLink(new SyndicationLink(entryUri, AtomLinkTypes.Alternate));
|
|
||||||
|
|
||||||
if (source.Entries.Published != null) {
|
|
||||||
nextItem.Published = DateTime.Parse(
|
|
||||||
nextNode.QuerySelectorAttributeOrText(
|
|
||||||
source.Entries.Published
|
|
||||||
)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
foreach (Type next in possibleTypes) {
|
||||||
|
IParserProvider candidate = (IParserProvider)Activator.CreateInstance(next);
|
||||||
|
if (candidate.Identifier == identifier)
|
||||||
|
return candidate;
|
||||||
}
|
}
|
||||||
if (source.Entries.LastUpdated != null) {
|
|
||||||
nextItem.LastUpdated = DateTime.Parse(
|
|
||||||
nextNode.QuerySelectorAttributeOrText(
|
|
||||||
source.Entries.LastUpdated
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
else if (source.Entries.Published != null) // Use the publish date if available
|
|
||||||
nextItem.LastUpdated = nextItem.Published;
|
|
||||||
else // It requires one, apparently
|
|
||||||
nextItem.LastUpdated = DateTimeOffset.Now;
|
|
||||||
|
|
||||||
|
return null;
|
||||||
if (source.Entries.AuthorName != null) {
|
|
||||||
SyndicationPerson author = new SyndicationPerson(
|
|
||||||
nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorName).Trim(),
|
|
||||||
""
|
|
||||||
);
|
|
||||||
if (source.Entries.AuthorUrl != null)
|
|
||||||
author.Uri = nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorUrl);
|
|
||||||
|
|
||||||
nextItem.AddContributor(author);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
nextItem.AddContributor(new SyndicationPerson("Unknown", ""));
|
|
||||||
|
|
||||||
|
|
||||||
await feed.Write(nextItem);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task<string> Render()
|
public async Task<string> Render()
|
||||||
|
|
|
@ -37,7 +37,6 @@ namespace PolyFeed
|
||||||
/// The type of source document to expect.
|
/// The type of source document to expect.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string SourceType { get; set; }
|
public string SourceType { get; set; }
|
||||||
public SourceType Type => (SourceType)Enum.Parse(typeof(SourceType), SourceType, true);
|
|
||||||
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|
|
@ -77,6 +77,10 @@ namespace PolyFeed.Helpers
|
||||||
// FUTURE: Add caching here? Reflection is slow
|
// FUTURE: Add caching here? Reflection is slow
|
||||||
foreach (Type nextType in IterateAllLoadedTypes())
|
foreach (Type nextType in IterateAllLoadedTypes())
|
||||||
{
|
{
|
||||||
|
// Interfaces implement themselves, but we don't want to return the interface itself
|
||||||
|
if (nextType == targetInterface)
|
||||||
|
continue;
|
||||||
|
|
||||||
// Make sure it implements the specified interface
|
// Make sure it implements the specified interface
|
||||||
if (!targetInterface.IsAssignableFrom(nextType))
|
if (!targetInterface.IsAssignableFrom(nextType))
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -1,23 +1,119 @@
|
||||||
using System;
|
using System;
|
||||||
|
using System.IO;
|
||||||
using System.Net;
|
using System.Net;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using System.Xml;
|
||||||
|
using Fizzler.Systems.HtmlAgilityPack;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using Microsoft.SyndicationFeed;
|
||||||
using Microsoft.SyndicationFeed.Atom;
|
using Microsoft.SyndicationFeed.Atom;
|
||||||
|
using PolyFeed.Helpers;
|
||||||
|
|
||||||
namespace PolyFeed.ParserProviders
|
namespace PolyFeed.ParserProviders
|
||||||
{
|
{
|
||||||
public class HtmlParserProvider : IParserProvider
|
public class HtmlParserProvider : IParserProvider
|
||||||
{
|
{
|
||||||
|
private XmlWriter xml = null;
|
||||||
|
private AtomFeedWriter feed = null;
|
||||||
|
|
||||||
|
public string Identifier => "html";
|
||||||
|
|
||||||
public HtmlParserProvider()
|
public HtmlParserProvider()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
public void ParseWebResponse(FeedSource source, WebResponse response)
|
public void SetOutputFeed(AtomFeedWriter inFeed, XmlWriter inXml) {
|
||||||
{
|
xml = inXml;
|
||||||
throw new NotImplementedException();
|
feed = inFeed;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void SetOutputFeed(AtomFeedWriter feed)
|
public async Task ParseWebResponse(FeedSource source, WebResponse response)
|
||||||
{
|
{
|
||||||
throw new NotImplementedException();
|
await Console.Error.WriteLineAsync("[Builder/Html] Parsing Html");
|
||||||
|
|
||||||
|
// Parse the HTML
|
||||||
|
HtmlDocument html = new HtmlDocument();
|
||||||
|
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
|
||||||
|
html.LoadHtml(await reader.ReadToEndAsync());
|
||||||
|
|
||||||
|
HtmlNode document = html.DocumentNode;
|
||||||
|
|
||||||
|
document.AbsolutifyUris(new Uri(source.Feed.Url));
|
||||||
|
|
||||||
|
|
||||||
|
await Console.Error.WriteLineAsync("[Builder/Html] Generating feed content");
|
||||||
|
|
||||||
|
// Add the title
|
||||||
|
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Feed.Title, document));
|
||||||
|
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Feed.Subtitle, document));
|
||||||
|
|
||||||
|
// Add the logo
|
||||||
|
if (source.Feed.Logo != null) {
|
||||||
|
HtmlNode logoNode = document.QuerySelector(source.Feed.Logo.Selector);
|
||||||
|
xml.WriteElementString("logo", logoNode.Attributes[source.Feed.Logo.Attribute].Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the feed entries
|
||||||
|
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.Entries.Selector))
|
||||||
|
{
|
||||||
|
await addEntry(source, nextNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task addEntry(FeedSource source, HtmlNode nextNode)
|
||||||
|
{
|
||||||
|
HtmlNode urlNode = nextNode.QuerySelector(source.Entries.Url.Selector);
|
||||||
|
if (urlNode == null)
|
||||||
|
throw new ApplicationException("Error: Failed to match entry url selector against an element.");
|
||||||
|
|
||||||
|
string url = source.Entries.Url.Attribute == string.Empty ?
|
||||||
|
urlNode.InnerText : urlNode.Attributes[source.Entries.Url.Attribute].DeEntitizeValue;
|
||||||
|
|
||||||
|
Uri entryUri = new Uri(new Uri(source.Feed.Url), new Uri(url));
|
||||||
|
AtomEntry nextItem = new AtomEntry() {
|
||||||
|
Id = entryUri.ToString(),
|
||||||
|
Title = ReferenceSubstitutor.Replace(source.Entries.Title, nextNode),
|
||||||
|
Description = ReferenceSubstitutor.Replace(source.Entries.Content, nextNode),
|
||||||
|
ContentType = "html"
|
||||||
|
};
|
||||||
|
nextItem.AddLink(new SyndicationLink(entryUri, AtomLinkTypes.Alternate));
|
||||||
|
|
||||||
|
if (source.Entries.Published != null) {
|
||||||
|
nextItem.Published = DateTime.Parse(
|
||||||
|
nextNode.QuerySelectorAttributeOrText(
|
||||||
|
source.Entries.Published
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (source.Entries.LastUpdated != null) {
|
||||||
|
nextItem.LastUpdated = DateTime.Parse(
|
||||||
|
nextNode.QuerySelectorAttributeOrText(
|
||||||
|
source.Entries.LastUpdated
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else if (source.Entries.Published != null) // Use the publish date if available
|
||||||
|
nextItem.LastUpdated = nextItem.Published;
|
||||||
|
else // It requires one, apparently
|
||||||
|
nextItem.LastUpdated = DateTimeOffset.Now;
|
||||||
|
|
||||||
|
|
||||||
|
if (source.Entries.AuthorName != null) {
|
||||||
|
SyndicationPerson author = new SyndicationPerson(
|
||||||
|
nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorName).Trim(),
|
||||||
|
""
|
||||||
|
);
|
||||||
|
if (source.Entries.AuthorUrl != null)
|
||||||
|
author.Uri = nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorUrl);
|
||||||
|
|
||||||
|
nextItem.AddContributor(author);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
nextItem.AddContributor(new SyndicationPerson("Unknown", ""));
|
||||||
|
|
||||||
|
|
||||||
|
await feed.Write(nextItem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Net;
|
using System.Net;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using System.Xml;
|
||||||
using Microsoft.SyndicationFeed.Atom;
|
using Microsoft.SyndicationFeed.Atom;
|
||||||
|
|
||||||
namespace PolyFeed.ParserProviders
|
namespace PolyFeed.ParserProviders
|
||||||
|
@ -11,6 +13,7 @@ namespace PolyFeed.ParserProviders
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public interface IParserProvider
|
public interface IParserProvider
|
||||||
{
|
{
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The identifier of this provider.
|
/// The identifier of this provider.
|
||||||
/// Used in the .toml configuration file to specify which parser to use.
|
/// Used in the .toml configuration file to specify which parser to use.
|
||||||
|
@ -21,12 +24,13 @@ namespace PolyFeed.ParserProviders
|
||||||
/// Sets the output feed that parsed output should be written to.
|
/// Sets the output feed that parsed output should be written to.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="feed">The output feed writer that output should be written to.</param>
|
/// <param name="feed">The output feed writer that output should be written to.</param>
|
||||||
void SetOutputFeed(AtomFeedWriter feed);
|
/// <param name="xml">The underlying XML feed try not to use this unless you *really* have to.</param>
|
||||||
|
void SetOutputFeed(AtomFeedWriter feed, XmlWriter xml);
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Parses a web response that's paired with a given <see cref="FeedSource" />.
|
/// Parses a web response that's paired with a given <see cref="FeedSource" />.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="source">The <see cref="FeedSource"/> object that the <paramref name="response"/> was generated from.</param>
|
/// <param name="source">The <see cref="FeedSource"/> object that the <paramref name="response"/> was generated from.</param>
|
||||||
/// <param name="response">The <see cref="WebResponse"/> in question needs parsing.</param>
|
/// <param name="response">The <see cref="WebResponse"/> in question needs parsing.</param>
|
||||||
void ParseWebResponse(FeedSource source, WebResponse response);
|
Task ParseWebResponse(FeedSource source, WebResponse response);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ using System.Runtime.CompilerServices;
|
||||||
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
|
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
|
||||||
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
|
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
|
||||||
|
|
||||||
[assembly: AssemblyVersion("1.0.*")]
|
[assembly: AssemblyVersion("0.1.1.*")]
|
||||||
|
|
||||||
// The following attributes are used to specify the signing key for the assembly,
|
// The following attributes are used to specify the signing key for the assembly,
|
||||||
// if desired. See the Mono documentation for more information about signing.
|
// if desired. See the Mono documentation for more information about signing.
|
||||||
|
|
Loading…
Reference in a new issue