259 lines
7.3 KiB
C#
259 lines
7.3 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using Newtonsoft.Json;
|
|
|
|
using LibSearchBox;
|
|
using System.Text.RegularExpressions;
|
|
using Newtonsoft.Json.Serialization;
|
|
using System.Threading.Tasks;
|
|
using SBRL.Utilities;
|
|
|
|
namespace SearchBoxCLI
|
|
{
|
|
enum OperatingModes
|
|
{
|
|
Query,
|
|
Index,
|
|
Add,
|
|
Remove,
|
|
Update
|
|
}
|
|
|
|
enum OutputModes
|
|
{
|
|
Json,
|
|
Text
|
|
}
|
|
|
|
class MainClass {
|
|
private static List<string> Extras = new List<string>();
|
|
|
|
private static OperatingModes Mode = OperatingModes.Query;
|
|
private static bool Batch = false;
|
|
private static string Name = string.Empty;
|
|
private static IEnumerable<string> Tags;
|
|
private static string SearchIndexFilepath = string.Empty;
|
|
private static TextReader Source = Console.In;
|
|
private static TextReader SourceOld = null, SourceNew = null;
|
|
private static OutputModes OutputMode = OutputModes.Json;
|
|
|
|
public static int Main(string[] args)
|
|
{
|
|
for (int i = 0; i < args.Length; i++)
|
|
{
|
|
if (!args[i].StartsWith("-")) {
|
|
Extras.Add(args[i]);
|
|
continue;
|
|
}
|
|
|
|
switch (args[i].TrimStart("-".ToCharArray())) {
|
|
case "s":
|
|
case "source":
|
|
string sourceFilename = args[++i];
|
|
Source = new StreamReader(sourceFilename);
|
|
Name = Name.Length > 0 ? Name : sourceFilename;
|
|
break;
|
|
|
|
case "batch":
|
|
Batch = true;
|
|
break;
|
|
|
|
case "old-source":
|
|
SourceOld = new StreamReader(args[++i]);
|
|
break;
|
|
case "new-source":
|
|
string newSourceFilename = args[++i];
|
|
SourceNew = new StreamReader(newSourceFilename);
|
|
Name = Name.Length > 0 ? Name : newSourceFilename;
|
|
break;
|
|
|
|
case "tags":
|
|
Tags = Regex.Split(args[++i], @",\s*");
|
|
break;
|
|
|
|
case "n":
|
|
case "name":
|
|
Name = args[++i];
|
|
break;
|
|
|
|
case "index":
|
|
SearchIndexFilepath = args[++i];
|
|
break;
|
|
|
|
case "help":
|
|
return HandleHelp();
|
|
|
|
default:
|
|
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
|
|
return 1;
|
|
}
|
|
}
|
|
if (Extras.Count < 1) return HandleHelp();
|
|
string modeText = Extras.First(); Extras.RemoveAt(0);
|
|
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText, true);
|
|
|
|
switch (Mode) {
|
|
case OperatingModes.Index: return HandleIndex();
|
|
case OperatingModes.Add: return HandleAdd();
|
|
case OperatingModes.Remove: return HandleRemove();
|
|
case OperatingModes.Query: return HandleQuery();
|
|
}
|
|
|
|
return 128;
|
|
}
|
|
|
|
private static int HandleHelp()
|
|
{
|
|
Console.WriteLine("SearchBox");
|
|
Console.WriteLine("---------");
|
|
Console.WriteLine("A standalone full-text search engine. Operates on plain text sources.");
|
|
Console.WriteLine();
|
|
Console.WriteLine("Usage:");
|
|
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
|
|
Console.WriteLine(" ./SearchBox.exe query \"{query string}\" [options]");
|
|
Console.WriteLine();
|
|
Console.WriteLine("Modes:");
|
|
Console.WriteLine(" query Query a pre-existing inverted search index");
|
|
Console.WriteLine(" index Generate a raw index of the source document.");
|
|
Console.WriteLine(" add Add a named document to a search index.");
|
|
Console.WriteLine(" remove Remove a named document from a search index.");
|
|
Console.WriteLine(" update Update a named document in a search index.");
|
|
Console.WriteLine();
|
|
Console.WriteLine("Options:");
|
|
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add}");
|
|
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
|
|
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
|
|
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove}");
|
|
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
|
|
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
|
|
Console.WriteLine(" --batch Enters a mode where the operations to process are specified via the source (by default stdin; change with --source as usual) - one per line in the format \"{filename}|{name}|{tags}\" {add}");
|
|
Console.WriteLine();
|
|
Console.WriteLine("Examples:");
|
|
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
|
|
Console.WriteLine();
|
|
return 1;
|
|
}
|
|
|
|
private static int HandleAdd()
|
|
{
|
|
if (Name == string.Empty && !Batch)
|
|
{
|
|
Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
|
|
return 1;
|
|
}
|
|
if (SearchIndexFilepath == string.Empty)
|
|
{
|
|
Console.Error.WriteLine("Error: No search index file path specified.");
|
|
return 1;
|
|
}
|
|
|
|
// --------------------------------------
|
|
|
|
SearchBox searchBox;
|
|
if (!File.Exists(SearchIndexFilepath))
|
|
searchBox = new SearchBox();
|
|
else
|
|
searchBox = JsonConvert.DeserializeObject<SearchBox>(File.ReadAllText(SearchIndexFilepath));
|
|
|
|
|
|
if (!Batch)
|
|
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
|
|
else {
|
|
try
|
|
{
|
|
Parallel.ForEach(LineIterator.GetLines(Source), (string nextLine) => {
|
|
string[] parts = nextLine.Split('|');
|
|
if (parts[0].Trim().Length == 0)
|
|
return;
|
|
|
|
searchBox.AddDocument(
|
|
parts[1].Trim(),
|
|
Regex.Split(parts[2], @",\s*"),
|
|
File.ReadAllText(parts[0].Trim())
|
|
);
|
|
Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}");
|
|
});
|
|
} catch (FileNotFoundException error) {
|
|
Console.Error.WriteLine(error.Message);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
|
|
|
Console.Error.WriteLine($"[Searchbox] [add] {Name} -> {SearchIndexFilepath}");
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
private static int HandleRemove()
|
|
{
|
|
if (Name == string.Empty) {
|
|
Console.Error.WriteLine("Error: The document name must be specified when removing a document!");
|
|
return 1;
|
|
}
|
|
|
|
// --------------------------------------
|
|
|
|
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
|
|
File.ReadAllText(SearchIndexFilepath)
|
|
);
|
|
|
|
searchBox.RemoveDocument(Name);
|
|
|
|
|
|
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
|
|
|
Console.Error.WriteLine($"[Searchbox] [remove] {Name} <- {SearchIndexFilepath}");
|
|
|
|
return 0;
|
|
}
|
|
|
|
private static int HandleQuery()
|
|
{
|
|
if (Extras.Count < 1) {
|
|
Console.Error.WriteLine("Error: No query specified!");
|
|
return 1;
|
|
}
|
|
|
|
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
|
|
File.ReadAllText(SearchIndexFilepath)
|
|
);
|
|
|
|
List<SearchResult> results = searchBox.Query(Extras[0], new QuerySettings());
|
|
|
|
switch (OutputMode)
|
|
{
|
|
case OutputModes.Json:
|
|
Console.WriteLine(JsonConvert.SerializeObject(results));
|
|
break;
|
|
case OutputModes.Text:
|
|
foreach (SearchResult nextResult in results)
|
|
Console.WriteLine(nextResult);
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
private static int HandleIndex()
|
|
{
|
|
Index index = new Index(Source.ReadToEnd());
|
|
switch (OutputMode)
|
|
{
|
|
case OutputModes.Json:
|
|
Console.WriteLine(JsonConvert.SerializeObject(index));
|
|
break;
|
|
case OutputModes.Text:
|
|
Console.WriteLine(index);
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
}
|
|
}
|