333 lines
9.8 KiB
C#
333 lines
9.8 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
|
|
using Newtonsoft.Json;
|
|
|
|
using SBRL.Utilities;
|
|
using LibSearchBox;
|
|
|
|
namespace SearchBoxCLI
|
|
{
|
|
enum OperatingModes
|
|
{
|
|
Query,
|
|
Index,
|
|
Add,
|
|
Remove,
|
|
Update,
|
|
GenerateContext
|
|
}
|
|
|
|
enum OutputModes
|
|
{
|
|
Json,
|
|
Text,
|
|
Html
|
|
}
|
|
|
|
class MainClass {
|
|
private static List<string> Extras = new List<string>();
|
|
|
|
private static OperatingModes Mode = OperatingModes.Query;
|
|
private static OutputModes OutputMode = OutputModes.Text;
|
|
private static bool Batch = false;
|
|
|
|
private static string Name = string.Empty;
|
|
private static IEnumerable<string> Tags;
|
|
|
|
private static string SearchIndexFilepath = string.Empty;
|
|
private static TextReader Source = Console.In;
|
|
private static TextReader SourceOld = null, SourceNew = null;
|
|
|
|
private static string Query = string.Empty;
|
|
private static int ResultsLimit = -1;
|
|
private static int ResultsOffset = 0;
|
|
|
|
public static int Main(string[] args)
|
|
{
|
|
for (int i = 0; i < args.Length; i++)
|
|
{
|
|
if (!args[i].StartsWith("-")) {
|
|
Extras.Add(args[i]);
|
|
continue;
|
|
}
|
|
|
|
switch (args[i].TrimStart("-".ToCharArray())) {
|
|
case "s":
|
|
case "source":
|
|
string sourceFilename = args[++i];
|
|
Source = new StreamReader(sourceFilename);
|
|
Name = Name.Length > 0 ? Name : sourceFilename;
|
|
break;
|
|
|
|
case "batch":
|
|
Batch = true;
|
|
break;
|
|
|
|
case "old-source":
|
|
SourceOld = new StreamReader(args[++i]);
|
|
break;
|
|
case "new-source":
|
|
string newSourceFilename = args[++i];
|
|
SourceNew = new StreamReader(newSourceFilename);
|
|
Name = Name.Length > 0 ? Name : newSourceFilename;
|
|
break;
|
|
|
|
case "tags":
|
|
Tags = Regex.Split(args[++i], @",\s*");
|
|
break;
|
|
|
|
case "n":
|
|
case "name":
|
|
Name = args[++i];
|
|
break;
|
|
|
|
case "index":
|
|
SearchIndexFilepath = args[++i];
|
|
break;
|
|
|
|
case "limit":
|
|
ResultsLimit = int.Parse(args[++i]);
|
|
break;
|
|
|
|
case "offset":
|
|
ResultsOffset = int.Parse(args[++i]);
|
|
break;
|
|
|
|
case "query":
|
|
Query = args[++i];
|
|
break;
|
|
|
|
case "format":
|
|
OutputMode = (OutputModes)Enum.Parse(typeof(OutputModes), args[++i], true);
|
|
break;
|
|
|
|
case "help":
|
|
return HandleHelp();
|
|
|
|
default:
|
|
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
|
|
return 1;
|
|
}
|
|
}
|
|
if (Extras.Count < 1) return HandleHelp();
|
|
string modeText = Extras.First().Replace("context", "generatecontext"); Extras.RemoveAt(0);
|
|
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText, true);
|
|
|
|
switch (Mode) {
|
|
case OperatingModes.Index: return HandleIndex();
|
|
case OperatingModes.Add: return HandleAdd();
|
|
case OperatingModes.Remove: return HandleRemove();
|
|
case OperatingModes.Query: return HandleQuery();
|
|
case OperatingModes.GenerateContext: return HandleContextGeneration();
|
|
default:
|
|
Console.Error.WriteLine($"Error: Don't know how to handle mode {Mode}.");
|
|
return 128;
|
|
}
|
|
}
|
|
|
|
private static int HandleHelp()
|
|
{
|
|
Console.WriteLine("SearchBox");
|
|
Console.WriteLine("---------");
|
|
Console.WriteLine("A standalone full-text search engine. Operates on plain text sources.");
|
|
Console.WriteLine();
|
|
Console.WriteLine("Usage:");
|
|
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
|
|
Console.WriteLine();
|
|
Console.WriteLine("Modes:");
|
|
Console.WriteLine(" query Query a pre-existing inverted search index");
|
|
Console.WriteLine(" context Generate a context string similar to a search result on the internet");
|
|
Console.WriteLine(" index Generate a raw index of the source document.");
|
|
Console.WriteLine(" add Add a named document to a search index.");
|
|
Console.WriteLine(" remove Remove a named document from a search index.");
|
|
Console.WriteLine(" update Update a named document in a search index.");
|
|
Console.WriteLine();
|
|
Console.WriteLine("Options:");
|
|
Console.WriteLine(" --batch Enters a mode where the operations to process are specified via the source (by default stdin; change with --source as usual) - one per line in the format \"{filename}|{name}|{tags}\" {add}");
|
|
Console.WriteLine(" --format Sets the format of the output. Possible values: text (default), json, html (context generation only) {query, index, context}");
|
|
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
|
|
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove, title}");
|
|
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
|
|
Console.WriteLine(" --limit Limits the number of results returned, -1 = no limit {query}");
|
|
Console.WriteLine(" --offset Skips the specified number of results from the beginning of the results list {query}");
|
|
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
|
|
Console.WriteLine(" --query Specifies the query string {query, context}");
|
|
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add,context}");
|
|
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
|
|
Console.WriteLine();
|
|
Console.WriteLine("Examples:");
|
|
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
|
|
Console.WriteLine();
|
|
return 1;
|
|
}
|
|
|
|
private static int HandleAdd()
|
|
{
|
|
if (Name == string.Empty && !Batch) {
|
|
Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
|
|
return 1;
|
|
}
|
|
if (SearchIndexFilepath == string.Empty) {
|
|
Console.Error.WriteLine("Error: No search index file path specified.");
|
|
return 1;
|
|
}
|
|
|
|
// --------------------------------------
|
|
|
|
SearchBox searchBox;
|
|
if (!File.Exists(SearchIndexFilepath))
|
|
searchBox = new SearchBox();
|
|
else
|
|
searchBox = JsonConvert.DeserializeObject<SearchBox>(File.ReadAllText(SearchIndexFilepath));
|
|
|
|
|
|
if (!Batch)
|
|
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
|
|
else {
|
|
try
|
|
{
|
|
Parallel.ForEach(LineIterator.GetLines(Source), (string nextLine) => {
|
|
string[] parts = nextLine.Split('|');
|
|
if (parts[0].Trim().Length == 0)
|
|
return;
|
|
|
|
searchBox.AddDocument(
|
|
parts[1].Trim(),
|
|
Regex.Split(parts[2], @",\s*"),
|
|
File.ReadAllText(parts[0].Trim())
|
|
);
|
|
Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}");
|
|
});
|
|
} catch (FileNotFoundException error) {
|
|
Console.Error.WriteLine(error.Message);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
|
|
|
Console.Error.WriteLine($"[Searchbox] [add] {Name} -> {SearchIndexFilepath}");
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
private static int HandleRemove()
|
|
{
|
|
if (string.IsNullOrEmpty(Name)) {
|
|
Console.Error.WriteLine("Error: The document name must be specified when removing a document!");
|
|
return 1;
|
|
}
|
|
|
|
// --------------------------------------
|
|
|
|
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
|
|
File.ReadAllText(SearchIndexFilepath)
|
|
);
|
|
|
|
searchBox.RemoveDocument(Name);
|
|
|
|
|
|
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
|
|
|
Console.Error.WriteLine($"[Searchbox] [remove] {Name} <- {SearchIndexFilepath}");
|
|
|
|
return 0;
|
|
}
|
|
|
|
private static int HandleQuery()
|
|
{
|
|
if (string.IsNullOrEmpty(Query)) {
|
|
Console.Error.WriteLine("Error: No query specified!");
|
|
return 1;
|
|
}
|
|
if (SearchIndexFilepath == string.Empty) {
|
|
Console.Error.WriteLine("Error: No search index file path specified.");
|
|
return 1;
|
|
}
|
|
|
|
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
|
|
File.ReadAllText(SearchIndexFilepath)
|
|
);
|
|
|
|
IEnumerable<SearchResult> resultsRaw = searchBox.Query(Query, new QuerySettings()).Skip(ResultsOffset);
|
|
List<SearchResult> results = new List<SearchResult>(
|
|
ResultsLimit > 0 ? resultsRaw.Take(ResultsLimit) : resultsRaw
|
|
);
|
|
|
|
switch (OutputMode)
|
|
{
|
|
case OutputModes.Json:
|
|
Console.WriteLine(JsonConvert.SerializeObject(results));
|
|
break;
|
|
case OutputModes.Text:
|
|
int i = 0;
|
|
foreach (SearchResult nextResult in results) {
|
|
Console.WriteLine($"#{i}: {nextResult}");
|
|
i++;
|
|
}
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
private static int HandleContextGeneration()
|
|
{
|
|
if (string.IsNullOrEmpty(Name)) {
|
|
Console.Error.WriteLine("Error: No document name specified.");
|
|
return 1;
|
|
}
|
|
if (string.IsNullOrEmpty(Query)) {
|
|
Console.Error.WriteLine("Error: No query specified.");
|
|
return 1;
|
|
}
|
|
if (SearchIndexFilepath == string.Empty) {
|
|
Console.Error.WriteLine("Error: No search index file path specified.");
|
|
return 1;
|
|
}
|
|
|
|
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
|
|
File.ReadAllText(SearchIndexFilepath)
|
|
);
|
|
|
|
ContextSettings generationSettings = new ContextSettings();
|
|
switch (OutputMode) {
|
|
case OutputModes.Json:
|
|
Console.Error.WriteLine("Error: JSON output for context generation is not supported.");
|
|
return 1;
|
|
case OutputModes.Html:
|
|
generationSettings.Html = true;
|
|
break;
|
|
case OutputModes.Text:
|
|
generationSettings.Html = false;
|
|
break;
|
|
}
|
|
|
|
Console.WriteLine(searchBox.GenerateContext(Name, Source.ReadToEnd(), Query, generationSettings));
|
|
|
|
return 0;
|
|
}
|
|
|
|
private static int HandleIndex()
|
|
{
|
|
Index index = new Index(Source.ReadToEnd());
|
|
switch (OutputMode)
|
|
{
|
|
case OutputModes.Json:
|
|
Console.WriteLine(JsonConvert.SerializeObject(index));
|
|
break;
|
|
case OutputModes.Text:
|
|
Console.WriteLine(index);
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
}
|
|
}
|