There has to be a simpler way of implementing the HTML tag insertion. Let's try this again....

This commit is contained in:
Starbeamrainbowlabs 2018-09-22 18:48:33 +01:00
parent 2aba3a9d86
commit 3d4ca5fd4c
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
2 changed files with 33 additions and 35 deletions

View file

@ -5,6 +5,7 @@ using System.Linq;
using System.Net; using System.Net;
using System.Threading.Tasks; using System.Threading.Tasks;
using Newtonsoft.Json; using Newtonsoft.Json;
using UnidecodeSharpFork;
namespace LibSearchBox namespace LibSearchBox
{ {
@ -221,42 +222,23 @@ namespace LibSearchBox
currentLength += nextSnippet.Item2 - nextSnippet.Item1; currentLength += nextSnippet.Item2 - nextSnippet.Item1;
} }
List<string> snippetsText = new List<string>(snippets.Select(((int, int) snippet) => { List<string> snippetsText = new List<string>(
string result = source.Substring(snippet.Item1, snippet.Item2 - snippet.Item1); snippets.Select(((int, int) snippet) => {
string result = source.Substring(snippet.Item1, snippet.Item2 - snippet.Item1);
if (string.IsNullOrWhiteSpace(result.Trim())) if (settings.Html) {
return ""; result = WebUtility.HtmlEncode(result);
string resultSearchable = result.Unidecode().ToLower();
if (settings.Html) { foreach ((int, string) nextToken in tokenizer.IterateTokens()) {
List<string> parts = new List<string>() { result }; // TODO: Insert html tags here
int remainingStartIndex = snippet.Item1; throw new NotImplementedException("HTML tag insertion hasn't been implemented yet");
foreach ((int, int) tokenDef in tokenLocations) {
if (tokenDef.Item1 - remainingStartIndex < result.Length) {
string remainingString = parts.Last();
parts.RemoveAt(parts.Count - 1); // Remove the last element
// The bit before the token
string nextPart = WebUtility.HtmlEncode(
remainingString.Substring(0, tokenDef.Item1 - remainingStartIndex)
);
// The token itself
nextPart += $"<span class='token'>{WebUtility.HtmlEncode(remainingString.Substring(tokenDef.Item1 - remainingStartIndex, tokenDef.Item2))}</span>";
parts.Add(nextPart);
// The bit after the token - this will be processed by the next loop, so ti doesn't need to be HTML-encoded (yet)
parts.Add(remainingString.Substring((tokenDef.Item1 - remainingStartIndex) + tokenDef.Item2));
// Update our marker as to where we've got up to
remainingStartIndex = (tokenDef.Item1 - remainingStartIndex) + tokenDef.Item2;
} }
} }
// HTML-encode the last part return result;
parts[parts.Count - 1] = WebUtility.HtmlEncode(parts[parts.Count - 1]); })
.Where((string snippet) => !string.IsNullOrWhiteSpace(snippet))
result = string.Join("", parts); );
}
return result;
}).Where((string snippet) => !string.IsNullOrWhiteSpace(snippet)));
// Add the separator at the beginning and end if we aren't at the bounds of the source document // Add the separator at the beginning and end if we aren't at the bounds of the source document
if (snippets.First().Item1 > 0) if (snippets.First().Item1 > 0)

View file

@ -1,4 +1,5 @@
using System; using System;
using System.Collections.Generic;
namespace LibSearchBox.Utilities namespace LibSearchBox.Utilities
{ {
@ -14,5 +15,20 @@ namespace LibSearchBox.Utilities
} }
return str; return str;
} }
// From https://stackoverflow.com/a/2641383/1460422
public static IEnumerable<int> AllIndexesOf(this string str, string value)
{
if (String.IsNullOrEmpty(value))
throw new ArgumentException("Error: The string to find may not be empty.", nameof(value));
List<int> indexes = new List<int>();
for (int index = 0; ; index += value.Length) {
index = str.IndexOf(value, index);
if (index == -1)
break;
yield return index;
}
}
} }
} }