There has to be a simpler way of implementing the HTML tag insertion. Let's try this again....

This commit is contained in:
Starbeamrainbowlabs 2018-09-22 18:48:33 +01:00
parent 2aba3a9d86
commit 3d4ca5fd4c
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
2 changed files with 33 additions and 35 deletions

View file

@ -5,6 +5,7 @@ using System.Linq;
using System.Net;
using System.Threading.Tasks;
using Newtonsoft.Json;
using UnidecodeSharpFork;
namespace LibSearchBox
{
@ -221,42 +222,23 @@ namespace LibSearchBox
currentLength += nextSnippet.Item2 - nextSnippet.Item1;
}
List<string> snippetsText = new List<string>(snippets.Select(((int, int) snippet) => {
string result = source.Substring(snippet.Item1, snippet.Item2 - snippet.Item1);
if (string.IsNullOrWhiteSpace(result.Trim()))
return "";
if (settings.Html) {
List<string> parts = new List<string>() { result };
int remainingStartIndex = snippet.Item1;
foreach ((int, int) tokenDef in tokenLocations) {
if (tokenDef.Item1 - remainingStartIndex < result.Length) {
string remainingString = parts.Last();
parts.RemoveAt(parts.Count - 1); // Remove the last element
// The bit before the token
string nextPart = WebUtility.HtmlEncode(
remainingString.Substring(0, tokenDef.Item1 - remainingStartIndex)
);
// The token itself
nextPart += $"<span class='token'>{WebUtility.HtmlEncode(remainingString.Substring(tokenDef.Item1 - remainingStartIndex, tokenDef.Item2))}</span>";
parts.Add(nextPart);
// The bit after the token - this will be processed by the next loop, so ti doesn't need to be HTML-encoded (yet)
parts.Add(remainingString.Substring((tokenDef.Item1 - remainingStartIndex) + tokenDef.Item2));
// Update our marker as to where we've got up to
remainingStartIndex = (tokenDef.Item1 - remainingStartIndex) + tokenDef.Item2;
}
List<string> snippetsText = new List<string>(
snippets.Select(((int, int) snippet) => {
string result = source.Substring(snippet.Item1, snippet.Item2 - snippet.Item1);
if (settings.Html) {
result = WebUtility.HtmlEncode(result);
string resultSearchable = result.Unidecode().ToLower();
foreach ((int, string) nextToken in tokenizer.IterateTokens()) {
// TODO: Insert html tags here
throw new NotImplementedException("HTML tag insertion hasn't been implemented yet");
}
}
// HTML-encode the last part
parts[parts.Count - 1] = WebUtility.HtmlEncode(parts[parts.Count - 1]);
result = string.Join("", parts);
}
return result;
}).Where((string snippet) => !string.IsNullOrWhiteSpace(snippet)));
return result;
})
.Where((string snippet) => !string.IsNullOrWhiteSpace(snippet))
);
// Add the separator at the beginning and end if we aren't at the bounds of the source document
if (snippets.First().Item1 > 0)

View file

@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
namespace LibSearchBox.Utilities
{
@ -14,5 +15,20 @@ namespace LibSearchBox.Utilities
}
return str;
}
// From https://stackoverflow.com/a/2641383/1460422
public static IEnumerable<int> AllIndexesOf(this string str, string value)
{
if (String.IsNullOrEmpty(value))
throw new ArgumentException("Error: The string to find may not be empty.", nameof(value));
List<int> indexes = new List<int>();
for (int index = 0; ; index += value.Length) {
index = str.IndexOf(value, index);
if (index == -1)
break;
yield return index;
}
}
}
}