SearchBox/SearchBox-CLI/Program.cs

333 lines
9.8 KiB
C#

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Newtonsoft.Json;
using SBRL.Utilities;
using LibSearchBox;
namespace SearchBoxCLI
{
enum OperatingModes
{
Query,
Index,
Add,
Remove,
Update,
GenerateContext
}
enum OutputModes
{
Json,
Text,
Html
}
class MainClass {
private static List<string> Extras = new List<string>();
private static OperatingModes Mode = OperatingModes.Query;
private static OutputModes OutputMode = OutputModes.Text;
private static bool Batch = false;
private static string Name = string.Empty;
private static IEnumerable<string> Tags;
private static string SearchIndexFilepath = string.Empty;
private static TextReader Source = Console.In;
private static TextReader SourceOld = null, SourceNew = null;
private static string Query = string.Empty;
private static int ResultsLimit = -1;
private static int ResultsOffset = 0;
public static int Main(string[] args)
{
for (int i = 0; i < args.Length; i++)
{
if (!args[i].StartsWith("-")) {
Extras.Add(args[i]);
continue;
}
switch (args[i].TrimStart("-".ToCharArray())) {
case "s":
case "source":
string sourceFilename = args[++i];
Source = new StreamReader(sourceFilename);
Name = Name.Length > 0 ? Name : sourceFilename;
break;
case "batch":
Batch = true;
break;
case "old-source":
SourceOld = new StreamReader(args[++i]);
break;
case "new-source":
string newSourceFilename = args[++i];
SourceNew = new StreamReader(newSourceFilename);
Name = Name.Length > 0 ? Name : newSourceFilename;
break;
case "tags":
Tags = Regex.Split(args[++i], @",\s*");
break;
case "n":
case "name":
Name = args[++i];
break;
case "index":
SearchIndexFilepath = args[++i];
break;
case "limit":
ResultsLimit = int.Parse(args[++i]);
break;
case "offset":
ResultsOffset = int.Parse(args[++i]);
break;
case "query":
Query = args[++i];
break;
case "format":
OutputMode = (OutputModes)Enum.Parse(typeof(OutputModes), args[++i], true);
break;
case "help":
return HandleHelp();
default:
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
return 1;
}
}
if (Extras.Count < 1) return HandleHelp();
string modeText = Extras.First().Replace("context", "generatecontext"); Extras.RemoveAt(0);
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText, true);
switch (Mode) {
case OperatingModes.Index: return HandleIndex();
case OperatingModes.Add: return HandleAdd();
case OperatingModes.Remove: return HandleRemove();
case OperatingModes.Query: return HandleQuery();
case OperatingModes.GenerateContext: return HandleContextGeneration();
default:
Console.Error.WriteLine($"Error: Don't know how to handle mode {Mode}.");
return 128;
}
}
private static int HandleHelp()
{
Console.WriteLine("SearchBox");
Console.WriteLine("---------");
Console.WriteLine("A standalone full-text search engine. Operates on plain text sources.");
Console.WriteLine();
Console.WriteLine("Usage:");
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
Console.WriteLine();
Console.WriteLine("Modes:");
Console.WriteLine(" query Query a pre-existing inverted search index");
Console.WriteLine(" context Generate a context string similar to a search result on the internet");
Console.WriteLine(" index Generate a raw index of the source document.");
Console.WriteLine(" add Add a named document to a search index.");
Console.WriteLine(" remove Remove a named document from a search index.");
Console.WriteLine(" update Update a named document in a search index.");
Console.WriteLine();
Console.WriteLine("Options:");
Console.WriteLine(" --batch Enters a mode where the operations to process are specified via the source (by default stdin; change with --source as usual) - one per line in the format \"{filename}|{name}|{tags}\" {add}");
Console.WriteLine(" --format Sets the format of the output. Possible values: text (default), json, html (context generation only) {query, index, context}");
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove, title}");
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
Console.WriteLine(" --limit Limits the number of results returned, -1 = no limit {query}");
Console.WriteLine(" --offset Skips the specified number of results from the beginning of the results list {query}");
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
Console.WriteLine(" --query Specifies the query string {query, context}");
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add,context}");
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
Console.WriteLine();
Console.WriteLine("Examples:");
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
Console.WriteLine();
return 1;
}
private static int HandleAdd()
{
if (Name == string.Empty && !Batch) {
Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
return 1;
}
if (SearchIndexFilepath == string.Empty) {
Console.Error.WriteLine("Error: No search index file path specified.");
return 1;
}
// --------------------------------------
SearchBox searchBox;
if (!File.Exists(SearchIndexFilepath))
searchBox = new SearchBox();
else
searchBox = JsonConvert.DeserializeObject<SearchBox>(File.ReadAllText(SearchIndexFilepath));
if (!Batch)
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
else {
try
{
Parallel.ForEach(LineIterator.GetLines(Source), (string nextLine) => {
string[] parts = nextLine.Split('|');
if (parts[0].Trim().Length == 0)
return;
searchBox.AddDocument(
parts[1].Trim(),
Regex.Split(parts[2], @",\s*"),
File.ReadAllText(parts[0].Trim())
);
Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}");
});
} catch (FileNotFoundException error) {
Console.Error.WriteLine(error.Message);
return 1;
}
}
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
Console.Error.WriteLine($"[Searchbox] [add] {Name} -> {SearchIndexFilepath}");
return 0;
}
private static int HandleRemove()
{
if (string.IsNullOrEmpty(Name)) {
Console.Error.WriteLine("Error: The document name must be specified when removing a document!");
return 1;
}
// --------------------------------------
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
File.ReadAllText(SearchIndexFilepath)
);
searchBox.RemoveDocument(Name);
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
Console.Error.WriteLine($"[Searchbox] [remove] {Name} <- {SearchIndexFilepath}");
return 0;
}
private static int HandleQuery()
{
if (string.IsNullOrEmpty(Query)) {
Console.Error.WriteLine("Error: No query specified!");
return 1;
}
if (SearchIndexFilepath == string.Empty) {
Console.Error.WriteLine("Error: No search index file path specified.");
return 1;
}
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
File.ReadAllText(SearchIndexFilepath)
);
IEnumerable<SearchResult> resultsRaw = searchBox.Query(Query, new QuerySettings()).Skip(ResultsOffset);
List<SearchResult> results = new List<SearchResult>(
ResultsLimit > 0 ? resultsRaw.Take(ResultsLimit) : resultsRaw
);
switch (OutputMode)
{
case OutputModes.Json:
Console.WriteLine(JsonConvert.SerializeObject(results));
break;
case OutputModes.Text:
int i = 0;
foreach (SearchResult nextResult in results) {
Console.WriteLine($"#{i}: {nextResult}");
i++;
}
break;
}
return 0;
}
private static int HandleContextGeneration()
{
if (string.IsNullOrEmpty(Name)) {
Console.Error.WriteLine("Error: No document name specified.");
return 1;
}
if (string.IsNullOrEmpty(Query)) {
Console.Error.WriteLine("Error: No query specified.");
return 1;
}
if (SearchIndexFilepath == string.Empty) {
Console.Error.WriteLine("Error: No search index file path specified.");
return 1;
}
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
File.ReadAllText(SearchIndexFilepath)
);
ContextSettings generationSettings = new ContextSettings();
switch (OutputMode) {
case OutputModes.Json:
Console.Error.WriteLine("Error: JSON output for context generation is not supported.");
return 1;
case OutputModes.Html:
generationSettings.Html = true;
break;
case OutputModes.Text:
generationSettings.Html = false;
break;
}
Console.WriteLine(searchBox.GenerateContext(Name, Source.ReadToEnd(), Query, generationSettings));
return 0;
}
private static int HandleIndex()
{
Index index = new Index(Source.ReadToEnd());
switch (OutputMode)
{
case OutputModes.Json:
Console.WriteLine(JsonConvert.SerializeObject(index));
break;
case OutputModes.Text:
Console.WriteLine(index);
break;
}
return 0;
}
}
}