Add support for --limit & --offset, and fix crash when querying

This commit is contained in:
Starbeamrainbowlabs 2018-09-22 14:25:10 +01:00
parent 53e4403356
commit b41f7f524a
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
2 changed files with 28 additions and 7 deletions

View file

@ -37,6 +37,8 @@ namespace SearchBoxCLI
private static string SearchIndexFilepath = string.Empty; private static string SearchIndexFilepath = string.Empty;
private static TextReader Source = Console.In; private static TextReader Source = Console.In;
private static TextReader SourceOld = null, SourceNew = null; private static TextReader SourceOld = null, SourceNew = null;
private static int ResultsLimit = -1;
private static int ResultsOffset = 0;
private static OutputModes OutputMode = OutputModes.Text; private static OutputModes OutputMode = OutputModes.Text;
public static int Main(string[] args) public static int Main(string[] args)
@ -82,6 +84,14 @@ namespace SearchBoxCLI
SearchIndexFilepath = args[++i]; SearchIndexFilepath = args[++i];
break; break;
case "limit":
ResultsLimit = int.Parse(args[++i]);
break;
case "offset":
ResultsOffset = int.Parse(args[++i]);
break;
case "format": case "format":
OutputMode = (OutputModes)Enum.Parse(typeof(OutputModes), args[++i], true); OutputMode = (OutputModes)Enum.Parse(typeof(OutputModes), args[++i], true);
break; break;
@ -126,7 +136,7 @@ namespace SearchBoxCLI
Console.WriteLine(" update Update a named document in a search index."); Console.WriteLine(" update Update a named document in a search index.");
Console.WriteLine(); Console.WriteLine();
Console.WriteLine("Options:"); Console.WriteLine("Options:");
Console.WriteLine(" --format Sets the format of the output. Possible values: text (default), json"); Console.WriteLine(" --format Sets the format of the output. Possible values: text (default), json {query,index}");
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add}"); Console.WriteLine(" --source, -s Specifies the path to the source document {index, add}");
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}"); Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}"); Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
@ -134,6 +144,8 @@ namespace SearchBoxCLI
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}"); Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}"); Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
Console.WriteLine(" --batch Enters a mode where the operations to process are specified via the source (by default stdin; change with --source as usual) - one per line in the format \"{filename}|{name}|{tags}\" {add}"); Console.WriteLine(" --batch Enters a mode where the operations to process are specified via the source (by default stdin; change with --source as usual) - one per line in the format \"{filename}|{name}|{tags}\" {add}");
Console.WriteLine(" --limit Limits the number of results returned, -1 = no limit {query}");
Console.WriteLine(" --offset Skips the specified number of results from the beginning of the results list {query}");
Console.WriteLine(); Console.WriteLine();
Console.WriteLine("Examples:"); Console.WriteLine("Examples:");
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\""); Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
@ -229,7 +241,10 @@ namespace SearchBoxCLI
File.ReadAllText(SearchIndexFilepath) File.ReadAllText(SearchIndexFilepath)
); );
List<SearchResult> results = searchBox.Query(Extras[0], new QuerySettings()); IEnumerable<SearchResult> resultsRaw = searchBox.Query(Extras[0], new QuerySettings()).Skip(ResultsOffset);
List<SearchResult> results = new List<SearchResult>(
ResultsLimit > 0 ? resultsRaw.Take(ResultsLimit) : resultsRaw
);
switch (OutputMode) switch (OutputMode)
{ {
@ -237,8 +252,11 @@ namespace SearchBoxCLI
Console.WriteLine(JsonConvert.SerializeObject(results)); Console.WriteLine(JsonConvert.SerializeObject(results));
break; break;
case OutputModes.Text: case OutputModes.Text:
foreach (SearchResult nextResult in results) int i = 0;
Console.WriteLine(nextResult); foreach (SearchResult nextResult in results) {
Console.WriteLine($"#{i}: {nextResult}");
i++;
}
break; break;
} }
return 0; return 0;

View file

@ -96,7 +96,7 @@ namespace LibSearchBox
ConcurrentBag<SearchResult> resultsRaw = new ConcurrentBag<SearchResult>(); ConcurrentBag<SearchResult> resultsRaw = new ConcurrentBag<SearchResult>();
Parallel.ForEach(matchingPages, (KeyValuePair<int, ConcurrentDictionary<string, int>> pageDef) => { Parallel.ForEach(matchingPages, (KeyValuePair<int, ConcurrentDictionary<string, int>> pageDef) => {
int rank = pageDef.Value.Values.Sum(); // FUTURE: Linq is slow. Is theree a faster way of doing this? int rank = pageDef.Value.Values.Sum(); // FUTURE: Linq is slow. Is there a faster way of doing this?
if (!metaTable.TryGetValue(pageDef.Key, out DocumentMeta metaInfo)) { if (!metaTable.TryGetValue(pageDef.Key, out DocumentMeta metaInfo)) {
if (Verbose) Console.Error.WriteLine($"Warning: Failed to fetch meta info for page id {pageDef.Key}"); if (Verbose) Console.Error.WriteLine($"Warning: Failed to fetch meta info for page id {pageDef.Key}");
} }
@ -112,8 +112,11 @@ namespace LibSearchBox
} }
List<SearchOffset> offsets = new List<SearchOffset>(); List<SearchOffset> offsets = new List<SearchOffset>();
foreach (Tuple<int, string> token in tokenizer.IterateTokens()) foreach (Tuple<int, string> token in tokenizer.IterateTokens()) {
offsets.AddRange(index.Query(token.Item2)[pageDef.Key].Select((int offset) => new SearchOffset(token.Item2, offset))); ConcurrentDictionary<int, List<int>> tokenQuery = index.Query(token.Item2);
if (!tokenQuery.ContainsKey(pageDef.Key)) continue; // Don't bother if this page doesn't contain this token
offsets.AddRange(tokenQuery[pageDef.Key].Select((int offset) => new SearchOffset(token.Item2, offset)));
}
offsets.Sort((SearchOffset x, SearchOffset y) => x.Offset - y.Offset); offsets.Sort((SearchOffset x, SearchOffset y) => x.Offset - y.Offset);
resultsRaw.Add(new SearchResult( resultsRaw.Add(new SearchResult(