mirror of
https://github.com/sbrl/PolyFeed.git
synced 2024-11-16 05:33:00 +00:00
Compare commits
7 commits
544bce4f54
...
205c7112c2
Author | SHA1 | Date | |
---|---|---|---|
205c7112c2 | |||
18ec61c80c | |||
d72f3256e9 | |||
5f3d1f824d | |||
6bb8da3660 | |||
89cac01006 | |||
7a70bf73a2 |
11 changed files with 347 additions and 109 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
||||||
|
*.atom
|
||||||
|
|
||||||
# Created by https://www.gitignore.io/api/visualstudio,monodevelop,csharp
|
# Created by https://www.gitignore.io/api/visualstudio,monodevelop,csharp
|
||||||
# Edit at https://www.gitignore.io/?templates=visualstudio,monodevelop,csharp
|
# Edit at https://www.gitignore.io/?templates=visualstudio,monodevelop,csharp
|
||||||
|
|
|
@ -9,28 +9,35 @@ using Fizzler.Systems.HtmlAgilityPack;
|
||||||
using HtmlAgilityPack;
|
using HtmlAgilityPack;
|
||||||
using Microsoft.SyndicationFeed;
|
using Microsoft.SyndicationFeed;
|
||||||
using Microsoft.SyndicationFeed.Atom;
|
using Microsoft.SyndicationFeed.Atom;
|
||||||
|
using PolyFeed.Helpers;
|
||||||
|
|
||||||
namespace PolyFeed
|
namespace PolyFeed
|
||||||
{
|
{
|
||||||
public class FeedBuilder
|
public class FeedBuilder
|
||||||
{
|
{
|
||||||
StringBuilder result = new StringBuilder();
|
MemoryStream stream = new MemoryStream();
|
||||||
XmlWriter xml = null;
|
XmlWriter xml = null;
|
||||||
AtomFeedWriter feed = null;
|
AtomFeedWriter feed = null;
|
||||||
|
|
||||||
public FeedBuilder() {
|
public FeedBuilder() {
|
||||||
xml = XmlWriter.Create(result);
|
xml = XmlWriter.Create(stream, new XmlWriterSettings() {
|
||||||
feed = new AtomFeedWriter(xml);
|
Indent = true,
|
||||||
|
Encoding = new UTF8Encoding(false),
|
||||||
|
WriteEndDocumentOnClose = true
|
||||||
|
});
|
||||||
|
feed = new AtomFeedWriter(xml, null, new AtomFormatter() { UseCDATA = true });
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task AddSource(FeedSource source) {
|
public async Task AddSource(FeedSource source) {
|
||||||
WebResponse response = await WebRequest.Create(source.Url).GetResponseAsync();
|
await Console.Error.WriteLineAsync("[Builder] Downloading content");
|
||||||
|
WebResponse response = await WebRequest.Create(source.Feed.Url).GetResponseAsync();
|
||||||
|
|
||||||
|
await Console.Error.WriteLineAsync("[Builder] Generating feed header");
|
||||||
|
|
||||||
// Write the header
|
// Write the header
|
||||||
await feed.WriteGenerator("Polyfeed", "https://gitlab.com/sbrl/PolyFeed.git", Program.GetProgramVersion());
|
await feed.WriteGenerator("Polyfeed", "https://github.com/sbrl/PolyFeed.git", Program.GetProgramVersion());
|
||||||
await feed.WriteId(source.Url);
|
await feed.WriteId(source.Feed.Url);
|
||||||
|
await feed.Write(new SyndicationLink(new Uri(source.Feed.Url), AtomLinkTypes.Self));
|
||||||
string lastModified = response.Headers.Get("last-modified");
|
string lastModified = response.Headers.Get("last-modified");
|
||||||
if (string.IsNullOrWhiteSpace(lastModified))
|
if (string.IsNullOrWhiteSpace(lastModified))
|
||||||
await feed.WriteUpdated(DateTimeOffset.Now);
|
await feed.WriteUpdated(DateTimeOffset.Now);
|
||||||
|
@ -39,64 +46,99 @@ namespace PolyFeed
|
||||||
|
|
||||||
string contentType = response.Headers.Get("content-type");
|
string contentType = response.Headers.Get("content-type");
|
||||||
|
|
||||||
switch (source.SourceType) {
|
switch (source.Feed.Type) {
|
||||||
case SourceType.HTML:
|
case SourceType.HTML:
|
||||||
await AddSourceHtml(source, response);
|
await AddSourceHtml(source, response);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new NotImplementedException($"Error: The source type {source.SourceType} hasn't been implemented yet.");
|
throw new NotImplementedException($"Error: The source type {source.Feed.Type} hasn't been implemented yet.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await Console.Error.WriteLineAsync("[Builder] Done!");
|
||||||
}
|
}
|
||||||
|
|
||||||
private async Task AddSourceHtml(FeedSource source, WebResponse response) {
|
private async Task AddSourceHtml(FeedSource source, WebResponse response) {
|
||||||
|
await Console.Error.WriteLineAsync("[Builder/Html] Parsing Html");
|
||||||
|
|
||||||
|
// Parse the HTML
|
||||||
HtmlDocument html = new HtmlDocument();
|
HtmlDocument html = new HtmlDocument();
|
||||||
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
|
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
|
||||||
html.LoadHtml(await reader.ReadToEndAsync());
|
html.LoadHtml(await reader.ReadToEndAsync());
|
||||||
|
|
||||||
HtmlNode document = html.DocumentNode;
|
HtmlNode document = html.DocumentNode;
|
||||||
|
|
||||||
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Title, document));
|
document.AbsolutifyUris(new Uri(source.Feed.Url));
|
||||||
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Subtitle, document));
|
|
||||||
|
|
||||||
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.EntrySelector)) {
|
|
||||||
HtmlNode urlNode = nextNode.QuerySelector(source.EntryUrlSelector);
|
|
||||||
string url = source.EntryUrlAttribute == string.Empty ?
|
|
||||||
urlNode.InnerText : urlNode.Attributes[source.EntryUrlAttribute].DeEntitizeValue;
|
|
||||||
|
|
||||||
|
|
||||||
SyndicationItem nextItem = new SyndicationItem() {
|
await Console.Error.WriteLineAsync("[Builder/Html] Generating feed content");
|
||||||
Id = url,
|
|
||||||
Title = ReferenceSubstitutor.Replace(source.EntryTitle, nextNode),
|
// Add the title
|
||||||
Description = ReferenceSubstitutor.Replace(source.EntryContent, nextNode)
|
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Feed.Title, document));
|
||||||
|
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Feed.Subtitle, document));
|
||||||
|
|
||||||
|
// Add the logo
|
||||||
|
if (source.Feed.Logo != null) {
|
||||||
|
HtmlNode logoNode = document.QuerySelector(source.Feed.Logo.Selector);
|
||||||
|
xml.WriteElementString("logo", logoNode.Attributes[source.Feed.Logo.Attribute].Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the feed entries
|
||||||
|
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.Entries.Selector)) {
|
||||||
|
HtmlNode urlNode = nextNode.QuerySelector(source.Entries.Url.Selector);
|
||||||
|
if (urlNode == null)
|
||||||
|
throw new ApplicationException("Error: Failed to match entry url selector against an element.");
|
||||||
|
|
||||||
|
string url = source.Entries.Url.Attribute == string.Empty ?
|
||||||
|
urlNode.InnerText : urlNode.Attributes[source.Entries.Url.Attribute].DeEntitizeValue;
|
||||||
|
|
||||||
|
Uri entryUri = new Uri(new Uri(source.Feed.Url), new Uri(url));
|
||||||
|
AtomEntry nextItem = new AtomEntry() {
|
||||||
|
Id = entryUri.ToString(),
|
||||||
|
Title = ReferenceSubstitutor.Replace(source.Entries.Title, nextNode),
|
||||||
|
Description = ReferenceSubstitutor.Replace(source.Entries.Content, nextNode),
|
||||||
|
ContentType = "html"
|
||||||
};
|
};
|
||||||
|
nextItem.AddLink(new SyndicationLink(entryUri, AtomLinkTypes.Alternate));
|
||||||
|
|
||||||
if (source.EntryPublishedSelector != string.Empty) {
|
if (source.Entries.Published != null) {
|
||||||
HtmlNode publishedNode = nextNode.QuerySelector(source.EntryPublishedSelector);
|
|
||||||
nextItem.Published = DateTime.Parse(
|
nextItem.Published = DateTime.Parse(
|
||||||
source.EntryPublishedAttribute == string.Empty
|
nextNode.QuerySelectorAttributeOrText(
|
||||||
? publishedNode.InnerText
|
source.Entries.Published
|
||||||
: publishedNode.Attributes[source.EntryPublishedAttribute].DeEntitizeValue
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
if (source.EntryPublishedSelector != string.Empty) {
|
if (source.Entries.Published != null) {
|
||||||
HtmlNode lastUpdatedNode = nextNode.QuerySelector(source.EntryLastUpdatedSelector);
|
nextItem.LastUpdated = DateTime.Parse(
|
||||||
nextItem.Published = DateTime.Parse(
|
nextNode.QuerySelectorAttributeOrText(
|
||||||
source.EntryLastUpdatedAttribute == string.Empty
|
source.Entries.LastUpdated
|
||||||
? lastUpdatedNode.InnerText
|
)
|
||||||
: lastUpdatedNode.Attributes[source.EntryLastUpdatedAttribute].DeEntitizeValue
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
else // It requires one, apparently
|
||||||
|
nextItem.LastUpdated = DateTimeOffset.Now;
|
||||||
|
|
||||||
|
SyndicationPerson author = new SyndicationPerson(
|
||||||
|
nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorName).Trim(),
|
||||||
|
""
|
||||||
|
);
|
||||||
|
if(source.Entries.AuthorUrl != null)
|
||||||
|
author.Uri = nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorUrl);
|
||||||
|
|
||||||
|
nextItem.AddContributor(author);
|
||||||
|
|
||||||
|
await feed.Write(nextItem);
|
||||||
|
|
||||||
await feed.Write(nextItem);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public string Render()
|
public async Task<string> Render()
|
||||||
{
|
{
|
||||||
xml.Flush();
|
await feed.Flush();
|
||||||
xml.WriteEndDocument();
|
xml.WriteEndDocument();
|
||||||
return result.ToString();
|
xml.Flush();
|
||||||
|
xml.Close();
|
||||||
|
return Encoding.UTF8.GetString(stream.ToArray());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,8 +4,29 @@ namespace PolyFeed
|
||||||
{
|
{
|
||||||
public enum SourceType { HTML, XML, JSON };
|
public enum SourceType { HTML, XML, JSON };
|
||||||
|
|
||||||
public class FeedSource
|
public class SelectorSettings
|
||||||
{
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// A selector that matches against an element to select.
|
||||||
|
/// </summary>
|
||||||
|
public string Selector { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// The name of the attribute to get the value of.
|
||||||
|
/// Set to an empty string to select the content of the element instead of the
|
||||||
|
/// content of an attribute.
|
||||||
|
/// </summary>
|
||||||
|
public string Attribute { get; set; }
|
||||||
|
|
||||||
|
public override string ToString()
|
||||||
|
{
|
||||||
|
return $"[SelectorSettings Selector = {Selector}, Attribute = {Attribute}]";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class FeedSettings
|
||||||
|
{
|
||||||
|
public string Output { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The url of the source document to parse.
|
/// The url of the source document to parse.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -15,7 +36,9 @@ namespace PolyFeed
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The type of source document to expect.
|
/// The type of source document to expect.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public SourceType SourceType { get; set; }
|
public string SourceType { get; set; }
|
||||||
|
public SourceType Type => (SourceType)Enum.Parse(typeof(SourceType), SourceType, true);
|
||||||
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The title of the feed.
|
/// The title of the feed.
|
||||||
|
@ -29,22 +52,14 @@ namespace PolyFeed
|
||||||
/// <value>The subtitle.</value>
|
/// <value>The subtitle.</value>
|
||||||
public string Subtitle { get; set; }
|
public string Subtitle { get; set; }
|
||||||
|
|
||||||
|
|
||||||
#region Entries
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// A selector that matches against an element that contains the URL that an
|
/// Selector that matches against the feed logo url.
|
||||||
/// entry should link to.
|
|
||||||
/// Relative to the element selected by <see cref="EntrySelector" />.
|
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string EntryUrlSelector { get; set; }
|
public SelectorSettings Logo { get; set; }
|
||||||
/// <summary>
|
}
|
||||||
/// The name of the attribute on the element selected by <see cref="EntryUrlSelector" />.
|
|
||||||
/// Set to an empty string to select the content of the element instead of the
|
|
||||||
/// content of an attribute.
|
|
||||||
/// </summary>
|
|
||||||
public string EntryUrlAttribute { get; set; } = "";
|
|
||||||
|
|
||||||
|
public class EntrySettings
|
||||||
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The selector that specifies the location of nodes in the object model that
|
/// The selector that specifies the location of nodes in the object model that
|
||||||
/// should be added to the feed.
|
/// should be added to the feed.
|
||||||
|
@ -53,41 +68,42 @@ namespace PolyFeed
|
||||||
/// - XML: XPath (e.g. //element_name)
|
/// - XML: XPath (e.g. //element_name)
|
||||||
/// - JSON: Dotted object (e.g. items.fruit)
|
/// - JSON: Dotted object (e.g. items.fruit)
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string EntrySelector { get; set; }
|
public string Selector { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// Selector settings to get the URL that an entry should link to.
|
||||||
|
/// </summary>
|
||||||
|
public SelectorSettings Url { get; set; } = new SelectorSettings() { Attribute = "href" };
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The title of an entry.
|
/// The title of an entry.
|
||||||
/// Selectors may be included in curly braces {} to substitute in content.
|
/// Selectors may be included in curly braces {} to substitute in content.
|
||||||
/// Such selectors are relative to the current feed entry.
|
/// Such selectors are relative to the current feed entry.
|
||||||
/// The format varies in the samem way as <see cref="EntrySelector" /> does.
|
/// The format varies in the same way as <see cref="Selector" /> does.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string EntryTitle { get; set; }
|
public string Title { get; set; }
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Same as <see cref="EntryTitle" />, but for the body of an entry. HTML is allowed.
|
/// Same as <see cref="Title" />, but for the body of an entry. HTML is allowed.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string EntryContent { get; set; }
|
public string Content { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The selector for the node that contains the date published for an entry.
|
/// The selector for the date published for an entry.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string EntryPublishedSelector { get; set; }
|
public SelectorSettings Published { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The name of the attribute that contains the date published for an entry.
|
/// The selector for the date published for an entry.
|
||||||
/// Set to <see cref="string.Empty" /> to use the content of the node itself.
|
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string EntryPublishedAttribute { get; set; }
|
public SelectorSettings LastUpdated { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
public SelectorSettings AuthorName { get; set; }
|
||||||
/// Same as <see cref="EntryPublishedSelector" />, but for the last updated.
|
public SelectorSettings AuthorUrl { get; set; }
|
||||||
/// If not specified, the last updated will be omitted.
|
|
||||||
/// </summary>
|
|
||||||
public string EntryLastUpdatedSelector { get; set; }
|
|
||||||
/// <summary>
|
|
||||||
/// Same as <see cref="EntryPublishedAttribute" />.
|
|
||||||
/// </summary>
|
|
||||||
public string EntryLastUpdatedAttribute { get; set; }
|
|
||||||
|
|
||||||
#endregion
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class FeedSource
|
||||||
|
{
|
||||||
|
public FeedSettings Feed { get; set; }
|
||||||
|
public EntrySettings Entries { get; set; }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
64
PolyFeed/Helpers/HtmlHelpers.cs
Normal file
64
PolyFeed/Helpers/HtmlHelpers.cs
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
using System;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using Fizzler.Systems.HtmlAgilityPack;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
|
||||||
|
namespace PolyFeed.Helpers
|
||||||
|
{
|
||||||
|
public static class HtmlHelpers
|
||||||
|
{
|
||||||
|
public static string QuerySelectorAttributeOrText(this HtmlNode htmlNode, SelectorSettings settings)
|
||||||
|
{
|
||||||
|
HtmlNode selectedNode = htmlNode.QuerySelector(settings.Selector);
|
||||||
|
|
||||||
|
if (selectedNode == null)
|
||||||
|
throw new ApplicationException($"Error: Selector {settings.Selector} failed to find any elements.");
|
||||||
|
|
||||||
|
if (string.IsNullOrWhiteSpace(settings.Attribute))
|
||||||
|
return selectedNode.InnerText;
|
||||||
|
|
||||||
|
return selectedNode.Attributes[settings.Attribute].Value;
|
||||||
|
}
|
||||||
|
public static string QuerySelectorAttributeOrHtml(this HtmlNode htmlNode, SelectorSettings settings)
|
||||||
|
{
|
||||||
|
HtmlNode selectedNode = htmlNode.QuerySelector(settings.Selector);
|
||||||
|
|
||||||
|
if (selectedNode == null)
|
||||||
|
throw new ApplicationException($"Error: Selector {settings.Selector} failed to find any elements.");
|
||||||
|
|
||||||
|
if (string.IsNullOrWhiteSpace(settings.Attribute))
|
||||||
|
return selectedNode.InnerHtml;
|
||||||
|
|
||||||
|
return selectedNode.Attributes[settings.Attribute].Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Searches for and converts all the links that are children of the current
|
||||||
|
/// <see cref="HtmlNode" /> to absolute URIs.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="rootNode">The root node to search from.</param>
|
||||||
|
/// <param name="baseUri">The base URI to use for conversion.</param>
|
||||||
|
/// <returns>The number of nodes updated.</returns>
|
||||||
|
public static int AbsolutifyUris(this HtmlNode rootNode, Uri baseUri)
|
||||||
|
{
|
||||||
|
int nodesUpdated = 0;
|
||||||
|
Parallel.ForEach(rootNode.QuerySelectorAll("a, img"), (HtmlNode node) => {
|
||||||
|
string attributeName = null;
|
||||||
|
if (node.Attributes["href"] != null) attributeName = "href";
|
||||||
|
if (node.Attributes["src"] != null) attributeName = "src";
|
||||||
|
|
||||||
|
if (node.Attributes[attributeName] == null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
node.Attributes[attributeName].Value = new Uri(
|
||||||
|
baseUri,
|
||||||
|
node.Attributes[attributeName].Value
|
||||||
|
).ToString();
|
||||||
|
|
||||||
|
Interlocked.Increment(ref nodesUpdated);
|
||||||
|
});
|
||||||
|
return nodesUpdated;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -147,12 +147,15 @@
|
||||||
<Compile Include="SubstitutionLexer.cs" />
|
<Compile Include="SubstitutionLexer.cs" />
|
||||||
<Compile Include="Salamander.Core\LexerPool.cs" />
|
<Compile Include="Salamander.Core\LexerPool.cs" />
|
||||||
<Compile Include="ReferenceSubstitutor.cs" />
|
<Compile Include="ReferenceSubstitutor.cs" />
|
||||||
|
<Compile Include="SnakeCasePropertySelector.cs" />
|
||||||
|
<Compile Include="Helpers\HtmlHelpers.cs" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="packages.config" />
|
<None Include="packages.config" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Folder Include="Salamander.Core\" />
|
<Folder Include="Salamander.Core\" />
|
||||||
|
<Folder Include="Helpers\" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||||
<Import Project="..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets" Condition="Exists('..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets')" />
|
<Import Project="..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets" Condition="Exists('..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets')" />
|
||||||
|
|
|
@ -13,7 +13,7 @@ namespace PolyFeed
|
||||||
public readonly string ProgramName = "PolyFeed";
|
public readonly string ProgramName = "PolyFeed";
|
||||||
public readonly string Description = "creates Atom feeds from websites that don't support it";
|
public readonly string Description = "creates Atom feeds from websites that don't support it";
|
||||||
|
|
||||||
public string ConfigFilepath = "feed.toml";
|
public string ConfigFilepath = null;
|
||||||
public string OutputFilepath = "feed.atom";
|
public string OutputFilepath = "feed.atom";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,20 +38,7 @@ namespace PolyFeed
|
||||||
{
|
{
|
||||||
case "-h":
|
case "-h":
|
||||||
case "--help":
|
case "--help":
|
||||||
Console.WriteLine($"{settings.ProgramName}, {GetProgramVersion()}");
|
showHelp();
|
||||||
Console.WriteLine(" By Starbeamrainbowlabs");
|
|
||||||
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine($"This program {settings.Description}.");
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine("Usage:");
|
|
||||||
Console.WriteLine($" ./{Path.GetFileName(Assembly.GetExecutingAssembly().Location)} [arguments]");
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine("Options:");
|
|
||||||
Console.WriteLine(" -h --help Displays this message");
|
|
||||||
Console.WriteLine(" -v --version Outputs the version number of this program");
|
|
||||||
Console.WriteLine(" -c --config Specifies the location of the feed configuration file to use to generate a feed (default: feed.toml)");
|
|
||||||
Console.WriteLine(" -o --output Specifies the location to write the output feed to (default: feed.atom)");
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case "-v":
|
case "-v":
|
||||||
|
@ -71,37 +58,64 @@ namespace PolyFeed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (settings.ConfigFilepath == null) {
|
||||||
|
Console.Error.WriteLine("Error: No configuration filepath detected. Try " +
|
||||||
|
"using --help to show usage information.");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
///// 2: Acquire environment variables /////
|
///// 2: Acquire environment variables /////
|
||||||
|
|
||||||
|
|
||||||
///// 3: Run program /////
|
///// 3: Run program /////
|
||||||
|
|
||||||
|
return run().Result;
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static async Task<string> run()
|
private static void showHelp()
|
||||||
{
|
{
|
||||||
FeedSource feedSource = new FeedSource();
|
Console.WriteLine($"{settings.ProgramName}, {GetProgramVersion()}");
|
||||||
TomlTable config = Toml.ReadFile(settings.ConfigFilepath, TomlSettings.Create());
|
Console.WriteLine(" By Starbeamrainbowlabs");
|
||||||
|
|
||||||
foreach (KeyValuePair<string, TomlObject> item in config) {
|
Console.WriteLine();
|
||||||
string key = Regex.Replace(
|
Console.WriteLine($"This program {settings.Description}.");
|
||||||
item.Key,
|
Console.WriteLine();
|
||||||
@"(^|_)[A-Za-z0-9]",
|
Console.WriteLine("Usage:");
|
||||||
(match) => match.Value.Replace("_", "").ToUpper()
|
Console.WriteLine($" ./{Path.GetFileName(Assembly.GetExecutingAssembly().Location)} [arguments]");
|
||||||
);
|
Console.WriteLine();
|
||||||
string value = item.Value.Get<TomlString>().Value;
|
Console.WriteLine("Options:");
|
||||||
feedSource.GetType().GetProperty(value).SetValue(
|
Console.WriteLine(" -h --help Displays this message");
|
||||||
feedSource,
|
Console.WriteLine(" -v --version Outputs the version number of this program");
|
||||||
value
|
Console.WriteLine(" -c --config Specifies the location of the TOML feed configuration file to use to generate a feed");
|
||||||
);
|
Console.WriteLine(" -o --output Specifies the location to write the output feed to (default: feed.atom)");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async Task<int> run()
|
||||||
|
{
|
||||||
|
TomlSettings parseSettings = TomlSettings.Create(s =>
|
||||||
|
s.ConfigurePropertyMapping(m => m.UseTargetPropertySelector(new SnakeCasePropertySelector()))
|
||||||
|
);
|
||||||
|
FeedSource feedSource = Toml.ReadFile<FeedSource>(settings.ConfigFilepath, parseSettings);
|
||||||
|
|
||||||
|
if (feedSource == null) {
|
||||||
|
Console.Error.WriteLine("Error: Somethine went wrong when parsing your settings file :-(");
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!string.IsNullOrWhiteSpace(feedSource.Feed.Output))
|
||||||
|
settings.OutputFilepath = feedSource.Feed.Output;
|
||||||
|
|
||||||
FeedBuilder feedBuilder = new FeedBuilder();
|
FeedBuilder feedBuilder = new FeedBuilder();
|
||||||
await feedBuilder.AddSource(feedSource);
|
try {
|
||||||
return await feedBuilder.Render();
|
await feedBuilder.AddSource(feedSource);
|
||||||
|
} catch (ApplicationException error) {
|
||||||
|
Console.Error.WriteLine(error.Message);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
await Console.Error.WriteLineAsync($"[Output] Writing feed to {settings.OutputFilepath}");
|
||||||
|
File.WriteAllText(settings.OutputFilepath, await feedBuilder.Render());
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,10 +15,14 @@ namespace PolyFeed
|
||||||
SubstitutionLexer lexer = lexerPool.AcquireLexer();
|
SubstitutionLexer lexer = lexerPool.AcquireLexer();
|
||||||
lexer.Initialise(inputString);
|
lexer.Initialise(inputString);
|
||||||
|
|
||||||
|
bool useHtml = true;
|
||||||
|
|
||||||
foreach (LexerToken<SubstitutionToken> nextToken in lexer.TokenStream())
|
foreach (LexerToken<SubstitutionToken> nextToken in lexer.TokenStream())
|
||||||
{
|
{
|
||||||
switch (nextToken.Type) {
|
switch (nextToken.Type) {
|
||||||
case SubstitutionToken.BraceOpen:
|
case SubstitutionToken.BraceOpen:
|
||||||
|
useHtml = nextToken.Value.Length == 1;
|
||||||
|
|
||||||
lexer.SaveRuleStates();
|
lexer.SaveRuleStates();
|
||||||
lexer.EnableRule(SubstitutionToken.Identifier);
|
lexer.EnableRule(SubstitutionToken.Identifier);
|
||||||
lexer.DisableRule(SubstitutionToken.Text);
|
lexer.DisableRule(SubstitutionToken.Text);
|
||||||
|
@ -32,7 +36,12 @@ namespace PolyFeed
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SubstitutionToken.Identifier:
|
case SubstitutionToken.Identifier:
|
||||||
result.Append(rootElement.QuerySelector(nextToken.Value));
|
HtmlNode targetNode = rootElement.QuerySelector(nextToken.Value);
|
||||||
|
if (targetNode == null) {
|
||||||
|
Console.Error.WriteLine($"Warning: Selector {nextToken.Value} failed to match any elements");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
result.Append(useHtml ? targetNode.InnerHtml : targetNode.InnerText);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
27
PolyFeed/SnakeCasePropertySelector.cs
Normal file
27
PolyFeed/SnakeCasePropertySelector.cs
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
using System;
|
||||||
|
using System.Reflection;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using Nett;
|
||||||
|
|
||||||
|
namespace PolyFeed
|
||||||
|
{
|
||||||
|
public class SnakeCasePropertySelector : ITargetPropertySelector
|
||||||
|
{
|
||||||
|
public SnakeCasePropertySelector()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public PropertyInfo TryGetTargetProperty(string key, Type target)
|
||||||
|
{
|
||||||
|
string transformedKey = Regex.Replace(
|
||||||
|
key,
|
||||||
|
@"(^|_)[A-Za-z0-9]",
|
||||||
|
(match) => match.Value.Replace("_", "").ToUpper()
|
||||||
|
);
|
||||||
|
|
||||||
|
//Console.WriteLine($"{key} -> {transformedKey}");
|
||||||
|
|
||||||
|
return target.GetProperty(transformedKey);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -24,8 +24,8 @@ namespace PolyFeed
|
||||||
AddRules(new List<LexerRule<SubstitutionToken>>() {
|
AddRules(new List<LexerRule<SubstitutionToken>>() {
|
||||||
new LexerRule<SubstitutionToken>(SubstitutionToken.Text, @"[^{}]+"),
|
new LexerRule<SubstitutionToken>(SubstitutionToken.Text, @"[^{}]+"),
|
||||||
new LexerRule<SubstitutionToken>(SubstitutionToken.Identifier, @"[^{}]+"),
|
new LexerRule<SubstitutionToken>(SubstitutionToken.Identifier, @"[^{}]+"),
|
||||||
new LexerRule<SubstitutionToken>(SubstitutionToken.BraceOpen, @"\{"),
|
new LexerRule<SubstitutionToken>(SubstitutionToken.BraceOpen, @"\{+"),
|
||||||
new LexerRule<SubstitutionToken>(SubstitutionToken.BraceClose, @"\}"),
|
new LexerRule<SubstitutionToken>(SubstitutionToken.BraceClose, @"\}+"),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
41
README.md
41
README.md
|
@ -2,4 +2,43 @@
|
||||||
|
|
||||||
> Create Atom feeds for websites that don't support it
|
> Create Atom feeds for websites that don't support it
|
||||||
|
|
||||||
Currently in alpha.
|
PolyFeed generates Atom feeds out of websites that don't have one, such as _Twitter_ or _Facebook_ (* cough * * cough *). It supports any platform that C♯ .NET applications can run, including Linux and Windows.
|
||||||
|
|
||||||
|
|
||||||
|
## Install
|
||||||
|
|
||||||
|
### From a Release
|
||||||
|
Download and extract the [latest release](https://github.com/sbrl/PolyFeed/releases/latest). You're done!
|
||||||
|
|
||||||
|
### Building from Source
|
||||||
|
|
||||||
|
Clone this repository, and then build the code with `msbuild`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
msbuild /p:Configuration=Release
|
||||||
|
```
|
||||||
|
|
||||||
|
The build output will be outputted to `PolyFeed/bin/Release`.
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
PolyFeed uses [TOML](https://github.com/toml-lang/toml) configuration files to define Atom feeds. First, create a configuration file that specifies how PolyFeed should generate an Atom feed - or use [one of the examples](https://github.com/sbrl/PolyFeed/tree/master/examples).
|
||||||
|
|
||||||
|
Then, run PolyFeed over it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
path/to/PolyFeed.exe --config path/to/config.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
...it will generate the named `.atom` file automatically, keeping you up-to-date on it's progress and any errors it encounters.
|
||||||
|
|
||||||
|
Use `PolyFeed.exe --help` to display the full range of command-line flags available.
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
Contributions are welcome - feel free to [open an issue](https://github.com/sbrl/PolyFeed/issues/new) or (even better) a [pull request](https://github.com/sbrl/PolyFeed/compare).
|
||||||
|
|
||||||
|
The [issue tracker](https://github.com/sbrl/PolyFeed/issues) is the place where all the tasks relating to the project are kept.
|
||||||
|
|
||||||
|
|
||||||
|
## Licence
|
||||||
|
PolyFeed is released under the _Mozilla Public License 2.0_. The full license text is included in the `LICENSE` file in this repository.
|
||||||
|
|
23
examples/twitter.toml
Normal file
23
examples/twitter.toml
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
[feed]
|
||||||
|
output = "euruicimages-Twitter.atom"
|
||||||
|
|
||||||
|
url = "https://mobile.twitter.com/euruicimages"
|
||||||
|
|
||||||
|
source_type = "html"
|
||||||
|
|
||||||
|
title = "{{.username}} on Twitter"
|
||||||
|
subtitle = "{{.details}}"
|
||||||
|
|
||||||
|
logo_url = { selector = ".avatar img", attribute = "src" }
|
||||||
|
|
||||||
|
[entries]
|
||||||
|
selector = ".tweet"
|
||||||
|
title = "Tweet by {{.username}} {{.tweet-social-context}}"
|
||||||
|
content = "<p><strong>{.avatar}{.fullname}:</strong></p>\n{.tweet-text}"
|
||||||
|
|
||||||
|
url = { selector = ".metadata a", attribute = "href" }
|
||||||
|
|
||||||
|
author_name = { selector = ".username" }
|
||||||
|
|
||||||
|
# published = { selector = "", attribute = "" }
|
||||||
|
# last_updated = { selector = "", attribute = "" }
|
Loading…
Reference in a new issue