Add support for --limit & --offset, and fix crash when querying

This commit is contained in:
Starbeamrainbowlabs 2018-09-22 14:25:10 +01:00
parent 53e4403356
commit b41f7f524a
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
2 changed files with 28 additions and 7 deletions

View file

@ -37,6 +37,8 @@ namespace SearchBoxCLI
private static string SearchIndexFilepath = string.Empty;
private static TextReader Source = Console.In;
private static TextReader SourceOld = null, SourceNew = null;
private static int ResultsLimit = -1;
private static int ResultsOffset = 0;
private static OutputModes OutputMode = OutputModes.Text;
public static int Main(string[] args)
@ -82,6 +84,14 @@ namespace SearchBoxCLI
SearchIndexFilepath = args[++i];
break;
case "limit":
ResultsLimit = int.Parse(args[++i]);
break;
case "offset":
ResultsOffset = int.Parse(args[++i]);
break;
case "format":
OutputMode = (OutputModes)Enum.Parse(typeof(OutputModes), args[++i], true);
break;
@ -126,7 +136,7 @@ namespace SearchBoxCLI
Console.WriteLine(" update Update a named document in a search index.");
Console.WriteLine();
Console.WriteLine("Options:");
Console.WriteLine(" --format Sets the format of the output. Possible values: text (default), json");
Console.WriteLine(" --format Sets the format of the output. Possible values: text (default), json {query,index}");
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add}");
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
@ -134,6 +144,8 @@ namespace SearchBoxCLI
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
Console.WriteLine(" --batch Enters a mode where the operations to process are specified via the source (by default stdin; change with --source as usual) - one per line in the format \"{filename}|{name}|{tags}\" {add}");
Console.WriteLine(" --limit Limits the number of results returned, -1 = no limit {query}");
Console.WriteLine(" --offset Skips the specified number of results from the beginning of the results list {query}");
Console.WriteLine();
Console.WriteLine("Examples:");
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
@ -229,7 +241,10 @@ namespace SearchBoxCLI
File.ReadAllText(SearchIndexFilepath)
);
List<SearchResult> results = searchBox.Query(Extras[0], new QuerySettings());
IEnumerable<SearchResult> resultsRaw = searchBox.Query(Extras[0], new QuerySettings()).Skip(ResultsOffset);
List<SearchResult> results = new List<SearchResult>(
ResultsLimit > 0 ? resultsRaw.Take(ResultsLimit) : resultsRaw
);
switch (OutputMode)
{
@ -237,8 +252,11 @@ namespace SearchBoxCLI
Console.WriteLine(JsonConvert.SerializeObject(results));
break;
case OutputModes.Text:
foreach (SearchResult nextResult in results)
Console.WriteLine(nextResult);
int i = 0;
foreach (SearchResult nextResult in results) {
Console.WriteLine($"#{i}: {nextResult}");
i++;
}
break;
}
return 0;

View file

@ -96,7 +96,7 @@ namespace LibSearchBox
ConcurrentBag<SearchResult> resultsRaw = new ConcurrentBag<SearchResult>();
Parallel.ForEach(matchingPages, (KeyValuePair<int, ConcurrentDictionary<string, int>> pageDef) => {
int rank = pageDef.Value.Values.Sum(); // FUTURE: Linq is slow. Is theree a faster way of doing this?
int rank = pageDef.Value.Values.Sum(); // FUTURE: Linq is slow. Is there a faster way of doing this?
if (!metaTable.TryGetValue(pageDef.Key, out DocumentMeta metaInfo)) {
if (Verbose) Console.Error.WriteLine($"Warning: Failed to fetch meta info for page id {pageDef.Key}");
}
@ -112,8 +112,11 @@ namespace LibSearchBox
}
List<SearchOffset> offsets = new List<SearchOffset>();
foreach (Tuple<int, string> token in tokenizer.IterateTokens())
offsets.AddRange(index.Query(token.Item2)[pageDef.Key].Select((int offset) => new SearchOffset(token.Item2, offset)));
foreach (Tuple<int, string> token in tokenizer.IterateTokens()) {
ConcurrentDictionary<int, List<int>> tokenQuery = index.Query(token.Item2);
if (!tokenQuery.ContainsKey(pageDef.Key)) continue; // Don't bother if this page doesn't contain this token
offsets.AddRange(tokenQuery[pageDef.Key].Select((int offset) => new SearchOffset(token.Item2, offset)));
}
offsets.Sort((SearchOffset x, SearchOffset y) => x.Offset - y.Offset);
resultsRaw.Add(new SearchResult(