mirror of
https://github.com/sbrl/PolyFeed.git
synced 2024-11-16 05:33:00 +00:00
Compare commits
No commits in common. "205c7112c26f0e6feb5d8a656b9bfebb2acddc1c" and "544bce4f54f868e94865bf02e977992c29698ff4" have entirely different histories.
205c7112c2
...
544bce4f54
11 changed files with 114 additions and 352 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,4 +1,3 @@
|
||||||
*.atom
|
|
||||||
|
|
||||||
# Created by https://www.gitignore.io/api/visualstudio,monodevelop,csharp
|
# Created by https://www.gitignore.io/api/visualstudio,monodevelop,csharp
|
||||||
# Edit at https://www.gitignore.io/?templates=visualstudio,monodevelop,csharp
|
# Edit at https://www.gitignore.io/?templates=visualstudio,monodevelop,csharp
|
||||||
|
|
|
@ -9,35 +9,28 @@ using Fizzler.Systems.HtmlAgilityPack;
|
||||||
using HtmlAgilityPack;
|
using HtmlAgilityPack;
|
||||||
using Microsoft.SyndicationFeed;
|
using Microsoft.SyndicationFeed;
|
||||||
using Microsoft.SyndicationFeed.Atom;
|
using Microsoft.SyndicationFeed.Atom;
|
||||||
using PolyFeed.Helpers;
|
|
||||||
|
|
||||||
namespace PolyFeed
|
namespace PolyFeed
|
||||||
{
|
{
|
||||||
public class FeedBuilder
|
public class FeedBuilder
|
||||||
{
|
{
|
||||||
MemoryStream stream = new MemoryStream();
|
StringBuilder result = new StringBuilder();
|
||||||
XmlWriter xml = null;
|
XmlWriter xml = null;
|
||||||
AtomFeedWriter feed = null;
|
AtomFeedWriter feed = null;
|
||||||
|
|
||||||
public FeedBuilder() {
|
public FeedBuilder() {
|
||||||
xml = XmlWriter.Create(stream, new XmlWriterSettings() {
|
xml = XmlWriter.Create(result);
|
||||||
Indent = true,
|
feed = new AtomFeedWriter(xml);
|
||||||
Encoding = new UTF8Encoding(false),
|
|
||||||
WriteEndDocumentOnClose = true
|
|
||||||
});
|
|
||||||
feed = new AtomFeedWriter(xml, null, new AtomFormatter() { UseCDATA = true });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task AddSource(FeedSource source) {
|
public async Task AddSource(FeedSource source) {
|
||||||
await Console.Error.WriteLineAsync("[Builder] Downloading content");
|
WebResponse response = await WebRequest.Create(source.Url).GetResponseAsync();
|
||||||
WebResponse response = await WebRequest.Create(source.Feed.Url).GetResponseAsync();
|
|
||||||
|
|
||||||
await Console.Error.WriteLineAsync("[Builder] Generating feed header");
|
|
||||||
|
|
||||||
// Write the header
|
// Write the header
|
||||||
await feed.WriteGenerator("Polyfeed", "https://github.com/sbrl/PolyFeed.git", Program.GetProgramVersion());
|
await feed.WriteGenerator("Polyfeed", "https://gitlab.com/sbrl/PolyFeed.git", Program.GetProgramVersion());
|
||||||
await feed.WriteId(source.Feed.Url);
|
await feed.WriteId(source.Url);
|
||||||
await feed.Write(new SyndicationLink(new Uri(source.Feed.Url), AtomLinkTypes.Self));
|
|
||||||
string lastModified = response.Headers.Get("last-modified");
|
string lastModified = response.Headers.Get("last-modified");
|
||||||
if (string.IsNullOrWhiteSpace(lastModified))
|
if (string.IsNullOrWhiteSpace(lastModified))
|
||||||
await feed.WriteUpdated(DateTimeOffset.Now);
|
await feed.WriteUpdated(DateTimeOffset.Now);
|
||||||
|
@ -46,99 +39,64 @@ namespace PolyFeed
|
||||||
|
|
||||||
string contentType = response.Headers.Get("content-type");
|
string contentType = response.Headers.Get("content-type");
|
||||||
|
|
||||||
switch (source.Feed.Type) {
|
switch (source.SourceType) {
|
||||||
case SourceType.HTML:
|
case SourceType.HTML:
|
||||||
await AddSourceHtml(source, response);
|
await AddSourceHtml(source, response);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new NotImplementedException($"Error: The source type {source.Feed.Type} hasn't been implemented yet.");
|
throw new NotImplementedException($"Error: The source type {source.SourceType} hasn't been implemented yet.");
|
||||||
}
|
}
|
||||||
|
|
||||||
await Console.Error.WriteLineAsync("[Builder] Done!");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private async Task AddSourceHtml(FeedSource source, WebResponse response) {
|
private async Task AddSourceHtml(FeedSource source, WebResponse response) {
|
||||||
await Console.Error.WriteLineAsync("[Builder/Html] Parsing Html");
|
|
||||||
|
|
||||||
// Parse the HTML
|
|
||||||
HtmlDocument html = new HtmlDocument();
|
HtmlDocument html = new HtmlDocument();
|
||||||
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
|
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
|
||||||
html.LoadHtml(await reader.ReadToEndAsync());
|
html.LoadHtml(await reader.ReadToEndAsync());
|
||||||
|
|
||||||
HtmlNode document = html.DocumentNode;
|
HtmlNode document = html.DocumentNode;
|
||||||
|
|
||||||
document.AbsolutifyUris(new Uri(source.Feed.Url));
|
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Title, document));
|
||||||
|
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Subtitle, document));
|
||||||
|
|
||||||
|
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.EntrySelector)) {
|
||||||
|
HtmlNode urlNode = nextNode.QuerySelector(source.EntryUrlSelector);
|
||||||
|
string url = source.EntryUrlAttribute == string.Empty ?
|
||||||
|
urlNode.InnerText : urlNode.Attributes[source.EntryUrlAttribute].DeEntitizeValue;
|
||||||
|
|
||||||
|
|
||||||
await Console.Error.WriteLineAsync("[Builder/Html] Generating feed content");
|
SyndicationItem nextItem = new SyndicationItem() {
|
||||||
|
Id = url,
|
||||||
// Add the title
|
Title = ReferenceSubstitutor.Replace(source.EntryTitle, nextNode),
|
||||||
await feed.WriteTitle(ReferenceSubstitutor.Replace(source.Feed.Title, document));
|
Description = ReferenceSubstitutor.Replace(source.EntryContent, nextNode)
|
||||||
await feed.WriteSubtitle(ReferenceSubstitutor.Replace(source.Feed.Subtitle, document));
|
|
||||||
|
|
||||||
// Add the logo
|
|
||||||
if (source.Feed.Logo != null) {
|
|
||||||
HtmlNode logoNode = document.QuerySelector(source.Feed.Logo.Selector);
|
|
||||||
xml.WriteElementString("logo", logoNode.Attributes[source.Feed.Logo.Attribute].Value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the feed entries
|
|
||||||
foreach (HtmlNode nextNode in document.QuerySelectorAll(source.Entries.Selector)) {
|
|
||||||
HtmlNode urlNode = nextNode.QuerySelector(source.Entries.Url.Selector);
|
|
||||||
if (urlNode == null)
|
|
||||||
throw new ApplicationException("Error: Failed to match entry url selector against an element.");
|
|
||||||
|
|
||||||
string url = source.Entries.Url.Attribute == string.Empty ?
|
|
||||||
urlNode.InnerText : urlNode.Attributes[source.Entries.Url.Attribute].DeEntitizeValue;
|
|
||||||
|
|
||||||
Uri entryUri = new Uri(new Uri(source.Feed.Url), new Uri(url));
|
|
||||||
AtomEntry nextItem = new AtomEntry() {
|
|
||||||
Id = entryUri.ToString(),
|
|
||||||
Title = ReferenceSubstitutor.Replace(source.Entries.Title, nextNode),
|
|
||||||
Description = ReferenceSubstitutor.Replace(source.Entries.Content, nextNode),
|
|
||||||
ContentType = "html"
|
|
||||||
};
|
};
|
||||||
nextItem.AddLink(new SyndicationLink(entryUri, AtomLinkTypes.Alternate));
|
|
||||||
|
|
||||||
if (source.Entries.Published != null) {
|
if (source.EntryPublishedSelector != string.Empty) {
|
||||||
|
HtmlNode publishedNode = nextNode.QuerySelector(source.EntryPublishedSelector);
|
||||||
nextItem.Published = DateTime.Parse(
|
nextItem.Published = DateTime.Parse(
|
||||||
nextNode.QuerySelectorAttributeOrText(
|
source.EntryPublishedAttribute == string.Empty
|
||||||
source.Entries.Published
|
? publishedNode.InnerText
|
||||||
)
|
: publishedNode.Attributes[source.EntryPublishedAttribute].DeEntitizeValue
|
||||||
);
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
if (source.Entries.Published != null) {
|
if (source.EntryPublishedSelector != string.Empty) {
|
||||||
nextItem.LastUpdated = DateTime.Parse(
|
HtmlNode lastUpdatedNode = nextNode.QuerySelector(source.EntryLastUpdatedSelector);
|
||||||
nextNode.QuerySelectorAttributeOrText(
|
nextItem.Published = DateTime.Parse(
|
||||||
source.Entries.LastUpdated
|
source.EntryLastUpdatedAttribute == string.Empty
|
||||||
)
|
? lastUpdatedNode.InnerText
|
||||||
|
: lastUpdatedNode.Attributes[source.EntryLastUpdatedAttribute].DeEntitizeValue
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
else // It requires one, apparently
|
|
||||||
nextItem.LastUpdated = DateTimeOffset.Now;
|
|
||||||
|
|
||||||
SyndicationPerson author = new SyndicationPerson(
|
|
||||||
nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorName).Trim(),
|
|
||||||
""
|
|
||||||
);
|
|
||||||
if(source.Entries.AuthorUrl != null)
|
|
||||||
author.Uri = nextNode.QuerySelectorAttributeOrText(source.Entries.AuthorUrl);
|
|
||||||
|
|
||||||
nextItem.AddContributor(author);
|
|
||||||
|
|
||||||
await feed.Write(nextItem);
|
|
||||||
|
|
||||||
|
await feed.Write(nextItem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task<string> Render()
|
public string Render()
|
||||||
{
|
{
|
||||||
await feed.Flush();
|
|
||||||
xml.WriteEndDocument();
|
|
||||||
xml.Flush();
|
xml.Flush();
|
||||||
xml.Close();
|
xml.WriteEndDocument();
|
||||||
return Encoding.UTF8.GetString(stream.ToArray());
|
return result.ToString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,29 +4,8 @@ namespace PolyFeed
|
||||||
{
|
{
|
||||||
public enum SourceType { HTML, XML, JSON };
|
public enum SourceType { HTML, XML, JSON };
|
||||||
|
|
||||||
public class SelectorSettings
|
public class FeedSource
|
||||||
{
|
{
|
||||||
/// <summary>
|
|
||||||
/// A selector that matches against an element to select.
|
|
||||||
/// </summary>
|
|
||||||
public string Selector { get; set; }
|
|
||||||
/// <summary>
|
|
||||||
/// The name of the attribute to get the value of.
|
|
||||||
/// Set to an empty string to select the content of the element instead of the
|
|
||||||
/// content of an attribute.
|
|
||||||
/// </summary>
|
|
||||||
public string Attribute { get; set; }
|
|
||||||
|
|
||||||
public override string ToString()
|
|
||||||
{
|
|
||||||
return $"[SelectorSettings Selector = {Selector}, Attribute = {Attribute}]";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public class FeedSettings
|
|
||||||
{
|
|
||||||
public string Output { get; set; }
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The url of the source document to parse.
|
/// The url of the source document to parse.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -36,9 +15,7 @@ namespace PolyFeed
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The type of source document to expect.
|
/// The type of source document to expect.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string SourceType { get; set; }
|
public SourceType SourceType { get; set; }
|
||||||
public SourceType Type => (SourceType)Enum.Parse(typeof(SourceType), SourceType, true);
|
|
||||||
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The title of the feed.
|
/// The title of the feed.
|
||||||
|
@ -52,14 +29,22 @@ namespace PolyFeed
|
||||||
/// <value>The subtitle.</value>
|
/// <value>The subtitle.</value>
|
||||||
public string Subtitle { get; set; }
|
public string Subtitle { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Selector that matches against the feed logo url.
|
|
||||||
/// </summary>
|
|
||||||
public SelectorSettings Logo { get; set; }
|
|
||||||
}
|
|
||||||
|
|
||||||
public class EntrySettings
|
#region Entries
|
||||||
{
|
|
||||||
|
/// <summary>
|
||||||
|
/// A selector that matches against an element that contains the URL that an
|
||||||
|
/// entry should link to.
|
||||||
|
/// Relative to the element selected by <see cref="EntrySelector" />.
|
||||||
|
/// </summary>
|
||||||
|
public string EntryUrlSelector { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// The name of the attribute on the element selected by <see cref="EntryUrlSelector" />.
|
||||||
|
/// Set to an empty string to select the content of the element instead of the
|
||||||
|
/// content of an attribute.
|
||||||
|
/// </summary>
|
||||||
|
public string EntryUrlAttribute { get; set; } = "";
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The selector that specifies the location of nodes in the object model that
|
/// The selector that specifies the location of nodes in the object model that
|
||||||
/// should be added to the feed.
|
/// should be added to the feed.
|
||||||
|
@ -68,42 +53,41 @@ namespace PolyFeed
|
||||||
/// - XML: XPath (e.g. //element_name)
|
/// - XML: XPath (e.g. //element_name)
|
||||||
/// - JSON: Dotted object (e.g. items.fruit)
|
/// - JSON: Dotted object (e.g. items.fruit)
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string Selector { get; set; }
|
public string EntrySelector { get; set; }
|
||||||
/// <summary>
|
|
||||||
/// Selector settings to get the URL that an entry should link to.
|
|
||||||
/// </summary>
|
|
||||||
public SelectorSettings Url { get; set; } = new SelectorSettings() { Attribute = "href" };
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The title of an entry.
|
/// The title of an entry.
|
||||||
/// Selectors may be included in curly braces {} to substitute in content.
|
/// Selectors may be included in curly braces {} to substitute in content.
|
||||||
/// Such selectors are relative to the current feed entry.
|
/// Such selectors are relative to the current feed entry.
|
||||||
/// The format varies in the same way as <see cref="Selector" /> does.
|
/// The format varies in the samem way as <see cref="EntrySelector" /> does.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string Title { get; set; }
|
public string EntryTitle { get; set; }
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Same as <see cref="Title" />, but for the body of an entry. HTML is allowed.
|
/// Same as <see cref="EntryTitle" />, but for the body of an entry. HTML is allowed.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string Content { get; set; }
|
public string EntryContent { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The selector for the date published for an entry.
|
/// The selector for the node that contains the date published for an entry.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public SelectorSettings Published { get; set; }
|
public string EntryPublishedSelector { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The selector for the date published for an entry.
|
/// The name of the attribute that contains the date published for an entry.
|
||||||
|
/// Set to <see cref="string.Empty" /> to use the content of the node itself.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public SelectorSettings LastUpdated { get; set; }
|
public string EntryPublishedAttribute { get; set; }
|
||||||
|
|
||||||
public SelectorSettings AuthorName { get; set; }
|
/// <summary>
|
||||||
public SelectorSettings AuthorUrl { get; set; }
|
/// Same as <see cref="EntryPublishedSelector" />, but for the last updated.
|
||||||
|
/// If not specified, the last updated will be omitted.
|
||||||
|
/// </summary>
|
||||||
|
public string EntryLastUpdatedSelector { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// Same as <see cref="EntryPublishedAttribute" />.
|
||||||
|
/// </summary>
|
||||||
|
public string EntryLastUpdatedAttribute { get; set; }
|
||||||
|
|
||||||
}
|
#endregion
|
||||||
|
|
||||||
public class FeedSource
|
|
||||||
{
|
|
||||||
public FeedSettings Feed { get; set; }
|
|
||||||
public EntrySettings Entries { get; set; }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,64 +0,0 @@
|
||||||
using System;
|
|
||||||
using System.Threading;
|
|
||||||
using System.Threading.Tasks;
|
|
||||||
using Fizzler.Systems.HtmlAgilityPack;
|
|
||||||
using HtmlAgilityPack;
|
|
||||||
|
|
||||||
namespace PolyFeed.Helpers
|
|
||||||
{
|
|
||||||
public static class HtmlHelpers
|
|
||||||
{
|
|
||||||
public static string QuerySelectorAttributeOrText(this HtmlNode htmlNode, SelectorSettings settings)
|
|
||||||
{
|
|
||||||
HtmlNode selectedNode = htmlNode.QuerySelector(settings.Selector);
|
|
||||||
|
|
||||||
if (selectedNode == null)
|
|
||||||
throw new ApplicationException($"Error: Selector {settings.Selector} failed to find any elements.");
|
|
||||||
|
|
||||||
if (string.IsNullOrWhiteSpace(settings.Attribute))
|
|
||||||
return selectedNode.InnerText;
|
|
||||||
|
|
||||||
return selectedNode.Attributes[settings.Attribute].Value;
|
|
||||||
}
|
|
||||||
public static string QuerySelectorAttributeOrHtml(this HtmlNode htmlNode, SelectorSettings settings)
|
|
||||||
{
|
|
||||||
HtmlNode selectedNode = htmlNode.QuerySelector(settings.Selector);
|
|
||||||
|
|
||||||
if (selectedNode == null)
|
|
||||||
throw new ApplicationException($"Error: Selector {settings.Selector} failed to find any elements.");
|
|
||||||
|
|
||||||
if (string.IsNullOrWhiteSpace(settings.Attribute))
|
|
||||||
return selectedNode.InnerHtml;
|
|
||||||
|
|
||||||
return selectedNode.Attributes[settings.Attribute].Value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Searches for and converts all the links that are children of the current
|
|
||||||
/// <see cref="HtmlNode" /> to absolute URIs.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="rootNode">The root node to search from.</param>
|
|
||||||
/// <param name="baseUri">The base URI to use for conversion.</param>
|
|
||||||
/// <returns>The number of nodes updated.</returns>
|
|
||||||
public static int AbsolutifyUris(this HtmlNode rootNode, Uri baseUri)
|
|
||||||
{
|
|
||||||
int nodesUpdated = 0;
|
|
||||||
Parallel.ForEach(rootNode.QuerySelectorAll("a, img"), (HtmlNode node) => {
|
|
||||||
string attributeName = null;
|
|
||||||
if (node.Attributes["href"] != null) attributeName = "href";
|
|
||||||
if (node.Attributes["src"] != null) attributeName = "src";
|
|
||||||
|
|
||||||
if (node.Attributes[attributeName] == null)
|
|
||||||
return;
|
|
||||||
|
|
||||||
node.Attributes[attributeName].Value = new Uri(
|
|
||||||
baseUri,
|
|
||||||
node.Attributes[attributeName].Value
|
|
||||||
).ToString();
|
|
||||||
|
|
||||||
Interlocked.Increment(ref nodesUpdated);
|
|
||||||
});
|
|
||||||
return nodesUpdated;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -147,15 +147,12 @@
|
||||||
<Compile Include="SubstitutionLexer.cs" />
|
<Compile Include="SubstitutionLexer.cs" />
|
||||||
<Compile Include="Salamander.Core\LexerPool.cs" />
|
<Compile Include="Salamander.Core\LexerPool.cs" />
|
||||||
<Compile Include="ReferenceSubstitutor.cs" />
|
<Compile Include="ReferenceSubstitutor.cs" />
|
||||||
<Compile Include="SnakeCasePropertySelector.cs" />
|
|
||||||
<Compile Include="Helpers\HtmlHelpers.cs" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="packages.config" />
|
<None Include="packages.config" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Folder Include="Salamander.Core\" />
|
<Folder Include="Salamander.Core\" />
|
||||||
<Folder Include="Helpers\" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||||
<Import Project="..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets" Condition="Exists('..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets')" />
|
<Import Project="..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets" Condition="Exists('..\packages\NETStandard.Library.2.0.3\build\netstandard2.0\NETStandard.Library.targets')" />
|
||||||
|
|
|
@ -13,7 +13,7 @@ namespace PolyFeed
|
||||||
public readonly string ProgramName = "PolyFeed";
|
public readonly string ProgramName = "PolyFeed";
|
||||||
public readonly string Description = "creates Atom feeds from websites that don't support it";
|
public readonly string Description = "creates Atom feeds from websites that don't support it";
|
||||||
|
|
||||||
public string ConfigFilepath = null;
|
public string ConfigFilepath = "feed.toml";
|
||||||
public string OutputFilepath = "feed.atom";
|
public string OutputFilepath = "feed.atom";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,7 +38,20 @@ namespace PolyFeed
|
||||||
{
|
{
|
||||||
case "-h":
|
case "-h":
|
||||||
case "--help":
|
case "--help":
|
||||||
showHelp();
|
Console.WriteLine($"{settings.ProgramName}, {GetProgramVersion()}");
|
||||||
|
Console.WriteLine(" By Starbeamrainbowlabs");
|
||||||
|
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.WriteLine($"This program {settings.Description}.");
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.WriteLine("Usage:");
|
||||||
|
Console.WriteLine($" ./{Path.GetFileName(Assembly.GetExecutingAssembly().Location)} [arguments]");
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.WriteLine("Options:");
|
||||||
|
Console.WriteLine(" -h --help Displays this message");
|
||||||
|
Console.WriteLine(" -v --version Outputs the version number of this program");
|
||||||
|
Console.WriteLine(" -c --config Specifies the location of the feed configuration file to use to generate a feed (default: feed.toml)");
|
||||||
|
Console.WriteLine(" -o --output Specifies the location to write the output feed to (default: feed.atom)");
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case "-v":
|
case "-v":
|
||||||
|
@ -58,66 +71,39 @@ namespace PolyFeed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (settings.ConfigFilepath == null) {
|
|
||||||
Console.Error.WriteLine("Error: No configuration filepath detected. Try " +
|
|
||||||
"using --help to show usage information.");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
///// 2: Acquire environment variables /////
|
///// 2: Acquire environment variables /////
|
||||||
|
|
||||||
|
|
||||||
///// 3: Run program /////
|
///// 3: Run program /////
|
||||||
|
|
||||||
return run().Result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void showHelp()
|
|
||||||
{
|
|
||||||
Console.WriteLine($"{settings.ProgramName}, {GetProgramVersion()}");
|
|
||||||
Console.WriteLine(" By Starbeamrainbowlabs");
|
|
||||||
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine($"This program {settings.Description}.");
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine("Usage:");
|
|
||||||
Console.WriteLine($" ./{Path.GetFileName(Assembly.GetExecutingAssembly().Location)} [arguments]");
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine("Options:");
|
|
||||||
Console.WriteLine(" -h --help Displays this message");
|
|
||||||
Console.WriteLine(" -v --version Outputs the version number of this program");
|
|
||||||
Console.WriteLine(" -c --config Specifies the location of the TOML feed configuration file to use to generate a feed");
|
|
||||||
Console.WriteLine(" -o --output Specifies the location to write the output feed to (default: feed.atom)");
|
|
||||||
}
|
|
||||||
|
|
||||||
private static async Task<int> run()
|
|
||||||
{
|
|
||||||
TomlSettings parseSettings = TomlSettings.Create(s =>
|
|
||||||
s.ConfigurePropertyMapping(m => m.UseTargetPropertySelector(new SnakeCasePropertySelector()))
|
|
||||||
);
|
|
||||||
FeedSource feedSource = Toml.ReadFile<FeedSource>(settings.ConfigFilepath, parseSettings);
|
|
||||||
|
|
||||||
if (feedSource == null) {
|
|
||||||
Console.Error.WriteLine("Error: Somethine went wrong when parsing your settings file :-(");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!string.IsNullOrWhiteSpace(feedSource.Feed.Output))
|
|
||||||
settings.OutputFilepath = feedSource.Feed.Output;
|
|
||||||
|
|
||||||
FeedBuilder feedBuilder = new FeedBuilder();
|
|
||||||
try {
|
|
||||||
await feedBuilder.AddSource(feedSource);
|
|
||||||
} catch (ApplicationException error) {
|
|
||||||
Console.Error.WriteLine(error.Message);
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
await Console.Error.WriteLineAsync($"[Output] Writing feed to {settings.OutputFilepath}");
|
|
||||||
File.WriteAllText(settings.OutputFilepath, await feedBuilder.Render());
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static async Task<string> run()
|
||||||
|
{
|
||||||
|
FeedSource feedSource = new FeedSource();
|
||||||
|
TomlTable config = Toml.ReadFile(settings.ConfigFilepath, TomlSettings.Create());
|
||||||
|
|
||||||
|
foreach (KeyValuePair<string, TomlObject> item in config) {
|
||||||
|
string key = Regex.Replace(
|
||||||
|
item.Key,
|
||||||
|
@"(^|_)[A-Za-z0-9]",
|
||||||
|
(match) => match.Value.Replace("_", "").ToUpper()
|
||||||
|
);
|
||||||
|
string value = item.Value.Get<TomlString>().Value;
|
||||||
|
feedSource.GetType().GetProperty(value).SetValue(
|
||||||
|
feedSource,
|
||||||
|
value
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
FeedBuilder feedBuilder = new FeedBuilder();
|
||||||
|
await feedBuilder.AddSource(feedSource);
|
||||||
|
return await feedBuilder.Render();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#region Helper Methods
|
#region Helper Methods
|
||||||
|
|
||||||
|
|
|
@ -15,14 +15,10 @@ namespace PolyFeed
|
||||||
SubstitutionLexer lexer = lexerPool.AcquireLexer();
|
SubstitutionLexer lexer = lexerPool.AcquireLexer();
|
||||||
lexer.Initialise(inputString);
|
lexer.Initialise(inputString);
|
||||||
|
|
||||||
bool useHtml = true;
|
|
||||||
|
|
||||||
foreach (LexerToken<SubstitutionToken> nextToken in lexer.TokenStream())
|
foreach (LexerToken<SubstitutionToken> nextToken in lexer.TokenStream())
|
||||||
{
|
{
|
||||||
switch (nextToken.Type) {
|
switch (nextToken.Type) {
|
||||||
case SubstitutionToken.BraceOpen:
|
case SubstitutionToken.BraceOpen:
|
||||||
useHtml = nextToken.Value.Length == 1;
|
|
||||||
|
|
||||||
lexer.SaveRuleStates();
|
lexer.SaveRuleStates();
|
||||||
lexer.EnableRule(SubstitutionToken.Identifier);
|
lexer.EnableRule(SubstitutionToken.Identifier);
|
||||||
lexer.DisableRule(SubstitutionToken.Text);
|
lexer.DisableRule(SubstitutionToken.Text);
|
||||||
|
@ -36,12 +32,7 @@ namespace PolyFeed
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SubstitutionToken.Identifier:
|
case SubstitutionToken.Identifier:
|
||||||
HtmlNode targetNode = rootElement.QuerySelector(nextToken.Value);
|
result.Append(rootElement.QuerySelector(nextToken.Value));
|
||||||
if (targetNode == null) {
|
|
||||||
Console.Error.WriteLine($"Warning: Selector {nextToken.Value} failed to match any elements");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
result.Append(useHtml ? targetNode.InnerHtml : targetNode.InnerText);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
using System;
|
|
||||||
using System.Reflection;
|
|
||||||
using System.Text.RegularExpressions;
|
|
||||||
using Nett;
|
|
||||||
|
|
||||||
namespace PolyFeed
|
|
||||||
{
|
|
||||||
public class SnakeCasePropertySelector : ITargetPropertySelector
|
|
||||||
{
|
|
||||||
public SnakeCasePropertySelector()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
public PropertyInfo TryGetTargetProperty(string key, Type target)
|
|
||||||
{
|
|
||||||
string transformedKey = Regex.Replace(
|
|
||||||
key,
|
|
||||||
@"(^|_)[A-Za-z0-9]",
|
|
||||||
(match) => match.Value.Replace("_", "").ToUpper()
|
|
||||||
);
|
|
||||||
|
|
||||||
//Console.WriteLine($"{key} -> {transformedKey}");
|
|
||||||
|
|
||||||
return target.GetProperty(transformedKey);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -24,8 +24,8 @@ namespace PolyFeed
|
||||||
AddRules(new List<LexerRule<SubstitutionToken>>() {
|
AddRules(new List<LexerRule<SubstitutionToken>>() {
|
||||||
new LexerRule<SubstitutionToken>(SubstitutionToken.Text, @"[^{}]+"),
|
new LexerRule<SubstitutionToken>(SubstitutionToken.Text, @"[^{}]+"),
|
||||||
new LexerRule<SubstitutionToken>(SubstitutionToken.Identifier, @"[^{}]+"),
|
new LexerRule<SubstitutionToken>(SubstitutionToken.Identifier, @"[^{}]+"),
|
||||||
new LexerRule<SubstitutionToken>(SubstitutionToken.BraceOpen, @"\{+"),
|
new LexerRule<SubstitutionToken>(SubstitutionToken.BraceOpen, @"\{"),
|
||||||
new LexerRule<SubstitutionToken>(SubstitutionToken.BraceClose, @"\}+"),
|
new LexerRule<SubstitutionToken>(SubstitutionToken.BraceClose, @"\}"),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
41
README.md
41
README.md
|
@ -2,43 +2,4 @@
|
||||||
|
|
||||||
> Create Atom feeds for websites that don't support it
|
> Create Atom feeds for websites that don't support it
|
||||||
|
|
||||||
PolyFeed generates Atom feeds out of websites that don't have one, such as _Twitter_ or _Facebook_ (* cough * * cough *). It supports any platform that C♯ .NET applications can run, including Linux and Windows.
|
Currently in alpha.
|
||||||
|
|
||||||
|
|
||||||
## Install
|
|
||||||
|
|
||||||
### From a Release
|
|
||||||
Download and extract the [latest release](https://github.com/sbrl/PolyFeed/releases/latest). You're done!
|
|
||||||
|
|
||||||
### Building from Source
|
|
||||||
|
|
||||||
Clone this repository, and then build the code with `msbuild`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
msbuild /p:Configuration=Release
|
|
||||||
```
|
|
||||||
|
|
||||||
The build output will be outputted to `PolyFeed/bin/Release`.
|
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
PolyFeed uses [TOML](https://github.com/toml-lang/toml) configuration files to define Atom feeds. First, create a configuration file that specifies how PolyFeed should generate an Atom feed - or use [one of the examples](https://github.com/sbrl/PolyFeed/tree/master/examples).
|
|
||||||
|
|
||||||
Then, run PolyFeed over it:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
path/to/PolyFeed.exe --config path/to/config.toml
|
|
||||||
```
|
|
||||||
|
|
||||||
...it will generate the named `.atom` file automatically, keeping you up-to-date on it's progress and any errors it encounters.
|
|
||||||
|
|
||||||
Use `PolyFeed.exe --help` to display the full range of command-line flags available.
|
|
||||||
|
|
||||||
## Contributing
|
|
||||||
Contributions are welcome - feel free to [open an issue](https://github.com/sbrl/PolyFeed/issues/new) or (even better) a [pull request](https://github.com/sbrl/PolyFeed/compare).
|
|
||||||
|
|
||||||
The [issue tracker](https://github.com/sbrl/PolyFeed/issues) is the place where all the tasks relating to the project are kept.
|
|
||||||
|
|
||||||
|
|
||||||
## Licence
|
|
||||||
PolyFeed is released under the _Mozilla Public License 2.0_. The full license text is included in the `LICENSE` file in this repository.
|
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
[feed]
|
|
||||||
output = "euruicimages-Twitter.atom"
|
|
||||||
|
|
||||||
url = "https://mobile.twitter.com/euruicimages"
|
|
||||||
|
|
||||||
source_type = "html"
|
|
||||||
|
|
||||||
title = "{{.username}} on Twitter"
|
|
||||||
subtitle = "{{.details}}"
|
|
||||||
|
|
||||||
logo_url = { selector = ".avatar img", attribute = "src" }
|
|
||||||
|
|
||||||
[entries]
|
|
||||||
selector = ".tweet"
|
|
||||||
title = "Tweet by {{.username}} {{.tweet-social-context}}"
|
|
||||||
content = "<p><strong>{.avatar}{.fullname}:</strong></p>\n{.tweet-text}"
|
|
||||||
|
|
||||||
url = { selector = ".metadata a", attribute = "href" }
|
|
||||||
|
|
||||||
author_name = { selector = ".username" }
|
|
||||||
|
|
||||||
# published = { selector = "", attribute = "" }
|
|
||||||
# last_updated = { selector = "", attribute = "" }
|
|
Loading…
Reference in a new issue