From 5f3d1f824d6f11acc45528a8b0cfa7903a51352f Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Mon, 29 Jul 2019 19:36:43 +0100 Subject: [PATCH] Absolutify urls before generating feeds --- PolyFeed/FeedBuilder.cs | 3 +++ PolyFeed/Helpers/HtmlHelpers.cs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/PolyFeed/FeedBuilder.cs b/PolyFeed/FeedBuilder.cs index ea2429d..8724a52 100644 --- a/PolyFeed/FeedBuilder.cs +++ b/PolyFeed/FeedBuilder.cs @@ -67,6 +67,9 @@ namespace PolyFeed HtmlNode document = html.DocumentNode; + document.AbsolutifyUris(new Uri(source.Feed.Url)); + + await Console.Error.WriteLineAsync("[Builder/Html] Generating feed content"); // Add the title diff --git a/PolyFeed/Helpers/HtmlHelpers.cs b/PolyFeed/Helpers/HtmlHelpers.cs index 6ffc4d3..0d093e7 100644 --- a/PolyFeed/Helpers/HtmlHelpers.cs +++ b/PolyFeed/Helpers/HtmlHelpers.cs @@ -1,4 +1,6 @@ using System; +using System.Threading; +using System.Threading.Tasks; using Fizzler.Systems.HtmlAgilityPack; using HtmlAgilityPack; @@ -30,5 +32,33 @@ namespace PolyFeed.Helpers return selectedNode.Attributes[settings.Attribute].Value; } + + /// + /// Searches for and converts all the links that are children of the current + /// to absolute URIs. + /// + /// The root node to search from. + /// The base URI to use for conversion. + /// The number of nodes updated. + public static int AbsolutifyUris(this HtmlNode rootNode, Uri baseUri) + { + int nodesUpdated = 0; + Parallel.ForEach(rootNode.QuerySelectorAll("a, img"), (HtmlNode node) => { + string attributeName = null; + if (node.Attributes["href"] != null) attributeName = "href"; + if (node.Attributes["src"] != null) attributeName = "src"; + + if (node.Attributes[attributeName] == null) + return; + + node.Attributes[attributeName].Value = new Uri( + baseUri, + node.Attributes[attributeName].Value + ).ToString(); + + Interlocked.Increment(ref nodesUpdated); + }); + return nodesUpdated; + } } }