SearchBox/SearchBox-CLI/Program.cs

259 lines
7.3 KiB
C#

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Newtonsoft.Json;
using LibSearchBox;
using System.Text.RegularExpressions;
using Newtonsoft.Json.Serialization;
using System.Threading.Tasks;
using SBRL.Utilities;
namespace SearchBoxCLI
{
enum OperatingModes
{
Query,
Index,
Add,
Remove,
Update
}
enum OutputModes
{
Json,
Text
}
class MainClass {
private static List<string> Extras = new List<string>();
private static OperatingModes Mode = OperatingModes.Query;
private static bool Batch = false;
private static string Name = string.Empty;
private static IEnumerable<string> Tags;
private static string SearchIndexFilepath = string.Empty;
private static TextReader Source = Console.In;
private static TextReader SourceOld = null, SourceNew = null;
private static OutputModes OutputMode = OutputModes.Json;
public static int Main(string[] args)
{
for (int i = 0; i < args.Length; i++)
{
if (!args[i].StartsWith("-")) {
Extras.Add(args[i]);
continue;
}
switch (args[i].TrimStart("-".ToCharArray())) {
case "s":
case "source":
string sourceFilename = args[++i];
Source = new StreamReader(sourceFilename);
Name = Name.Length > 0 ? Name : sourceFilename;
break;
case "batch":
Batch = true;
break;
case "old-source":
SourceOld = new StreamReader(args[++i]);
break;
case "new-source":
string newSourceFilename = args[++i];
SourceNew = new StreamReader(newSourceFilename);
Name = Name.Length > 0 ? Name : newSourceFilename;
break;
case "tags":
Tags = Regex.Split(args[++i], @",\s*");
break;
case "n":
case "name":
Name = args[++i];
break;
case "index":
SearchIndexFilepath = args[++i];
break;
case "help":
return HandleHelp();
default:
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
return 1;
}
}
if (Extras.Count < 1) return HandleHelp();
string modeText = Extras.First(); Extras.RemoveAt(0);
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText, true);
switch (Mode) {
case OperatingModes.Index: return HandleIndex();
case OperatingModes.Add: return HandleAdd();
case OperatingModes.Remove: return HandleRemove();
case OperatingModes.Query: return HandleQuery();
}
return 128;
}
private static int HandleHelp()
{
Console.WriteLine("SearchBox");
Console.WriteLine("---------");
Console.WriteLine("A standalone full-text search engine. Operates on plain text sources.");
Console.WriteLine();
Console.WriteLine("Usage:");
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
Console.WriteLine(" ./SearchBox.exe query \"{query string}\" [options]");
Console.WriteLine();
Console.WriteLine("Modes:");
Console.WriteLine(" query Query a pre-existing inverted search index");
Console.WriteLine(" index Generate a raw index of the source document.");
Console.WriteLine(" add Add a named document to a search index.");
Console.WriteLine(" remove Remove a named document from a search index.");
Console.WriteLine(" update Update a named document in a search index.");
Console.WriteLine();
Console.WriteLine("Options:");
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add}");
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove}");
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
Console.WriteLine(" --batch Enters a mode where the operations to process are specified via the source (by default stdin; change with --source as usual) - one per line in the format \"{filename}|{name}|{tags}\" {add}");
Console.WriteLine();
Console.WriteLine("Examples:");
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
Console.WriteLine();
return 1;
}
private static int HandleAdd()
{
if (Name == string.Empty && !Batch)
{
Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
return 1;
}
if (SearchIndexFilepath == string.Empty)
{
Console.Error.WriteLine("Error: No search index file path specified.");
return 1;
}
// --------------------------------------
SearchBox searchBox;
if (!File.Exists(SearchIndexFilepath))
searchBox = new SearchBox();
else
searchBox = JsonConvert.DeserializeObject<SearchBox>(File.ReadAllText(SearchIndexFilepath));
if (!Batch)
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
else {
try
{
Parallel.ForEach(LineIterator.GetLines(Source), (string nextLine) => {
string[] parts = nextLine.Split('|');
if (parts[0].Trim().Length == 0)
return;
searchBox.AddDocument(
parts[1].Trim(),
Regex.Split(parts[2], @",\s*"),
File.ReadAllText(parts[0].Trim())
);
Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}");
});
} catch (FileNotFoundException error) {
Console.Error.WriteLine(error.Message);
return 1;
}
}
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
Console.Error.WriteLine($"[Searchbox] [add] {Name} -> {SearchIndexFilepath}");
return 0;
}
private static int HandleRemove()
{
if (Name == string.Empty) {
Console.Error.WriteLine("Error: The document name must be specified when removing a document!");
return 1;
}
// --------------------------------------
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
File.ReadAllText(SearchIndexFilepath)
);
searchBox.RemoveDocument(Name);
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
Console.Error.WriteLine($"[Searchbox] [remove] {Name} <- {SearchIndexFilepath}");
return 0;
}
private static int HandleQuery()
{
if (Extras.Count < 1) {
Console.Error.WriteLine("Error: No query specified!");
return 1;
}
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
File.ReadAllText(SearchIndexFilepath)
);
List<SearchResult> results = searchBox.Query(Extras[0], new QuerySettings());
switch (OutputMode)
{
case OutputModes.Json:
Console.WriteLine(JsonConvert.SerializeObject(results));
break;
case OutputModes.Text:
foreach (SearchResult nextResult in results)
Console.WriteLine(nextResult);
break;
}
return 0;
}
private static int HandleIndex()
{
Index index = new Index(Source.ReadToEnd());
switch (OutputMode)
{
case OutputModes.Json:
Console.WriteLine(JsonConvert.SerializeObject(index));
break;
case OutputModes.Text:
Console.WriteLine(index);
break;
}
return 0;
}
}
}