2018-09-02 15:36:37 +00:00
using System ;
2018-09-09 20:02:40 +00:00
using System.Collections.Generic ;
using System.IO ;
using System.Linq ;
2018-09-11 13:27:25 +00:00
using System.Text.RegularExpressions ;
2018-09-11 22:02:30 +00:00
using System.Threading.Tasks ;
2018-09-22 16:12:46 +00:00
using Newtonsoft.Json ;
2018-09-11 22:02:30 +00:00
using SBRL.Utilities ;
2018-09-22 16:12:46 +00:00
using LibSearchBox ;
2018-09-09 20:02:40 +00:00
2018-09-02 15:36:37 +00:00
namespace SearchBoxCLI
{
2018-09-09 20:02:40 +00:00
enum OperatingModes
2018-09-02 15:36:37 +00:00
{
2018-09-09 20:02:40 +00:00
Query ,
Index ,
2018-09-11 13:27:25 +00:00
Add ,
Remove ,
2018-09-22 16:12:46 +00:00
Update ,
GenerateContext
2018-09-09 20:02:40 +00:00
}
enum OutputModes
{
Json ,
2018-09-22 16:12:46 +00:00
Text ,
Html
2018-09-09 20:02:40 +00:00
}
class MainClass {
2018-09-22 11:11:47 +00:00
private static List < string > Extras = new List < string > ( ) ;
2018-09-09 20:02:40 +00:00
private static OperatingModes Mode = OperatingModes . Query ;
2018-09-22 16:12:46 +00:00
private static OutputModes OutputMode = OutputModes . Text ;
2018-09-11 18:12:56 +00:00
private static bool Batch = false ;
2018-09-22 16:12:46 +00:00
2018-09-09 20:02:40 +00:00
private static string Name = string . Empty ;
2018-09-11 13:27:25 +00:00
private static IEnumerable < string > Tags ;
2018-09-22 16:12:46 +00:00
2018-09-11 13:27:25 +00:00
private static string SearchIndexFilepath = string . Empty ;
2018-09-09 20:02:40 +00:00
private static TextReader Source = Console . In ;
2018-09-11 13:27:25 +00:00
private static TextReader SourceOld = null , SourceNew = null ;
2018-09-22 16:12:46 +00:00
private static string Query = string . Empty ;
2018-09-22 13:25:10 +00:00
private static int ResultsLimit = - 1 ;
private static int ResultsOffset = 0 ;
2018-09-09 20:02:40 +00:00
public static int Main ( string [ ] args )
{
for ( int i = 0 ; i < args . Length ; i + + )
{
2018-09-11 13:47:24 +00:00
if ( ! args [ i ] . StartsWith ( "-" ) ) {
2018-09-22 11:11:47 +00:00
Extras . Add ( args [ i ] ) ;
2018-09-11 13:47:24 +00:00
continue ;
}
switch ( args [ i ] . TrimStart ( "-" . ToCharArray ( ) ) ) {
2018-09-09 20:02:40 +00:00
case "s" :
case "source" :
2018-09-09 23:12:00 +00:00
string sourceFilename = args [ + + i ] ;
Source = new StreamReader ( sourceFilename ) ;
2018-09-11 13:27:25 +00:00
Name = Name . Length > 0 ? Name : sourceFilename ;
2018-09-09 20:02:40 +00:00
break ;
2018-09-11 18:12:56 +00:00
case "batch" :
Batch = true ;
break ;
2018-09-11 13:27:25 +00:00
case "old-source" :
SourceOld = new StreamReader ( args [ + + i ] ) ;
break ;
case "new-source" :
string newSourceFilename = args [ + + i ] ;
SourceNew = new StreamReader ( newSourceFilename ) ;
Name = Name . Length > 0 ? Name : newSourceFilename ;
2018-09-09 20:02:40 +00:00
break ;
2018-09-11 13:27:25 +00:00
case "tags" :
2018-09-11 18:12:56 +00:00
Tags = Regex . Split ( args [ + + i ] , @",\s*" ) ;
2018-09-09 20:02:40 +00:00
break ;
2018-09-11 13:27:25 +00:00
case "n" :
case "name" :
Name = args [ + + i ] ;
break ;
2018-09-09 20:02:40 +00:00
2018-09-11 13:27:25 +00:00
case "index" :
SearchIndexFilepath = args [ + + i ] ;
2018-09-09 20:02:40 +00:00
break ;
2018-09-22 13:25:10 +00:00
case "limit" :
ResultsLimit = int . Parse ( args [ + + i ] ) ;
break ;
case "offset" :
ResultsOffset = int . Parse ( args [ + + i ] ) ;
break ;
2018-09-22 16:12:46 +00:00
case "query" :
Query = args [ + + i ] ;
break ;
2018-09-22 11:42:24 +00:00
case "format" :
OutputMode = ( OutputModes ) Enum . Parse ( typeof ( OutputModes ) , args [ + + i ] , true ) ;
break ;
2018-09-09 20:02:40 +00:00
case "help" :
2018-09-11 13:27:25 +00:00
return HandleHelp ( ) ;
2018-09-09 20:02:40 +00:00
default :
Console . Error . WriteLine ( $"Error: Unknown property {args[i]}." ) ;
return 1 ;
}
}
2018-09-22 11:11:47 +00:00
if ( Extras . Count < 1 ) return HandleHelp ( ) ;
2018-09-22 16:12:46 +00:00
string modeText = Extras . First ( ) . Replace ( "context" , "generatecontext" ) ; Extras . RemoveAt ( 0 ) ;
2018-09-11 13:47:24 +00:00
Mode = ( OperatingModes ) Enum . Parse ( typeof ( OperatingModes ) , modeText , true ) ;
2018-09-09 20:02:40 +00:00
switch ( Mode ) {
case OperatingModes . Index : return HandleIndex ( ) ;
2018-09-11 13:27:25 +00:00
case OperatingModes . Add : return HandleAdd ( ) ;
case OperatingModes . Remove : return HandleRemove ( ) ;
2018-09-21 22:36:03 +00:00
case OperatingModes . Query : return HandleQuery ( ) ;
2018-09-22 16:12:46 +00:00
case OperatingModes . GenerateContext : return HandleContextGeneration ( ) ;
default :
Console . Error . WriteLine ( $"Error: Don't know how to handle mode {Mode}." ) ;
return 128 ;
2018-09-09 20:02:40 +00:00
}
}
2018-09-11 13:27:25 +00:00
private static int HandleHelp ( )
{
Console . WriteLine ( "SearchBox" ) ;
Console . WriteLine ( "---------" ) ;
Console . WriteLine ( "A standalone full-text search engine. Operates on plain text sources." ) ;
Console . WriteLine ( ) ;
Console . WriteLine ( "Usage:" ) ;
Console . WriteLine ( " ./SearchBox.exe {mode} [options]" ) ;
Console . WriteLine ( ) ;
Console . WriteLine ( "Modes:" ) ;
Console . WriteLine ( " query Query a pre-existing inverted search index" ) ;
2018-09-22 16:12:46 +00:00
Console . WriteLine ( " context Generate a context string similar to a search result on the internet" ) ;
2018-09-11 13:27:25 +00:00
Console . WriteLine ( " index Generate a raw index of the source document." ) ;
Console . WriteLine ( " add Add a named document to a search index." ) ;
Console . WriteLine ( " remove Remove a named document from a search index." ) ;
Console . WriteLine ( " update Update a named document in a search index." ) ;
Console . WriteLine ( ) ;
Console . WriteLine ( "Options:" ) ;
2018-09-11 18:12:56 +00:00
Console . WriteLine ( " --batch Enters a mode where the operations to process are specified via the source (by default stdin; change with --source as usual) - one per line in the format \"{filename}|{name}|{tags}\" {add}" ) ;
2018-09-22 16:12:46 +00:00
Console . WriteLine ( " --format Sets the format of the output. Possible values: text (default), json, html (context generation only) {query, index, context}" ) ;
Console . WriteLine ( " --index Specifies the location of the search index to use {add, remove, update}" ) ;
Console . WriteLine ( " --name, -n Sets the name of the source document {add, remove, title}" ) ;
Console . WriteLine ( " --new-source Specifies the path to the new version of the source document to update {update}" ) ;
2018-09-22 13:25:10 +00:00
Console . WriteLine ( " --limit Limits the number of results returned, -1 = no limit {query}" ) ;
Console . WriteLine ( " --offset Skips the specified number of results from the beginning of the results list {query}" ) ;
2018-09-22 16:12:46 +00:00
Console . WriteLine ( " --old-source Specifies the path to the old version of the source document to update {update}" ) ;
Console . WriteLine ( " --query Specifies the query string {query, context}" ) ;
Console . WriteLine ( " --source, -s Specifies the path to the source document {index, add,context}" ) ;
Console . WriteLine ( " --tags Sets the tags to associate with the document. {add, update}" ) ;
2018-09-11 13:27:25 +00:00
Console . WriteLine ( ) ;
Console . WriteLine ( "Examples:" ) ;
Console . WriteLine ( " cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"" ) ;
Console . WriteLine ( ) ;
return 1 ;
}
private static int HandleAdd ( )
2018-09-02 15:36:37 +00:00
{
2018-09-22 16:12:46 +00:00
if ( Name = = string . Empty & & ! Batch ) {
2018-09-09 23:12:00 +00:00
Console . Error . WriteLine ( "Error: The document name must be specified when reading from stdin!" ) ;
return 1 ;
}
2018-09-22 16:12:46 +00:00
if ( SearchIndexFilepath = = string . Empty ) {
2018-09-11 13:47:24 +00:00
Console . Error . WriteLine ( "Error: No search index file path specified." ) ;
return 1 ;
}
2018-09-11 13:27:25 +00:00
// --------------------------------------
SearchBox searchBox ;
if ( ! File . Exists ( SearchIndexFilepath ) )
searchBox = new SearchBox ( ) ;
else
searchBox = JsonConvert . DeserializeObject < SearchBox > ( File . ReadAllText ( SearchIndexFilepath ) ) ;
2018-09-11 18:12:56 +00:00
if ( ! Batch )
searchBox . AddDocument ( Name , Tags , Source . ReadToEnd ( ) ) ;
else {
2018-09-11 22:02:30 +00:00
try
{
Parallel . ForEach ( LineIterator . GetLines ( Source ) , ( string nextLine ) = > {
string [ ] parts = nextLine . Split ( '|' ) ;
if ( parts [ 0 ] . Trim ( ) . Length = = 0 )
return ;
2018-09-11 18:12:56 +00:00
searchBox . AddDocument (
2018-09-11 21:04:41 +00:00
parts [ 1 ] . Trim ( ) ,
Regex . Split ( parts [ 2 ] , @",\s*" ) ,
File . ReadAllText ( parts [ 0 ] . Trim ( ) )
2018-09-11 18:12:56 +00:00
) ;
Console . Error . WriteLine ( $"[Searchbox] [add] {parts[0].Trim()}" ) ;
2018-09-11 22:02:30 +00:00
} ) ;
} catch ( FileNotFoundException error ) {
Console . Error . WriteLine ( error . Message ) ;
return 1 ;
2018-09-11 18:12:56 +00:00
}
}
2018-09-11 13:27:25 +00:00
File . WriteAllText ( SearchIndexFilepath , JsonConvert . SerializeObject ( searchBox ) ) ;
2018-09-11 18:12:56 +00:00
Console . Error . WriteLine ( $"[Searchbox] [add] {Name} -> {SearchIndexFilepath}" ) ;
2018-09-11 13:27:25 +00:00
return 0 ;
}
2018-09-11 18:12:56 +00:00
2018-09-11 13:27:25 +00:00
private static int HandleRemove ( )
{
2018-09-22 16:12:46 +00:00
if ( string . IsNullOrEmpty ( Name ) ) {
2018-09-11 13:27:25 +00:00
Console . Error . WriteLine ( "Error: The document name must be specified when removing a document!" ) ;
2018-09-09 23:12:00 +00:00
return 1 ;
}
// --------------------------------------
2018-09-11 13:27:25 +00:00
SearchBox searchBox = JsonConvert . DeserializeObject < SearchBox > (
File . ReadAllText ( SearchIndexFilepath )
) ;
2018-09-09 20:02:40 +00:00
2018-09-11 13:27:25 +00:00
searchBox . RemoveDocument ( Name ) ;
2018-09-09 23:12:00 +00:00
2018-09-11 13:27:25 +00:00
File . WriteAllText ( SearchIndexFilepath , JsonConvert . SerializeObject ( searchBox ) ) ;
2018-09-09 20:02:40 +00:00
2018-09-11 18:12:56 +00:00
Console . Error . WriteLine ( $"[Searchbox] [remove] {Name} <- {SearchIndexFilepath}" ) ;
2018-09-09 20:02:40 +00:00
return 0 ;
}
2018-09-21 22:36:03 +00:00
private static int HandleQuery ( )
{
2018-09-22 16:12:46 +00:00
if ( string . IsNullOrEmpty ( Query ) ) {
2018-09-22 11:11:47 +00:00
Console . Error . WriteLine ( "Error: No query specified!" ) ;
return 1 ;
}
2018-09-22 16:12:46 +00:00
if ( SearchIndexFilepath = = string . Empty ) {
Console . Error . WriteLine ( "Error: No search index file path specified." ) ;
return 1 ;
}
2018-09-22 11:11:47 +00:00
SearchBox searchBox = JsonConvert . DeserializeObject < SearchBox > (
File . ReadAllText ( SearchIndexFilepath )
) ;
2018-09-22 16:12:46 +00:00
IEnumerable < SearchResult > resultsRaw = searchBox . Query ( Query , new QuerySettings ( ) ) . Skip ( ResultsOffset ) ;
2018-09-22 13:25:10 +00:00
List < SearchResult > results = new List < SearchResult > (
ResultsLimit > 0 ? resultsRaw . Take ( ResultsLimit ) : resultsRaw
) ;
2018-09-22 11:11:47 +00:00
2018-09-21 22:36:03 +00:00
switch ( OutputMode )
{
case OutputModes . Json :
2018-09-22 11:11:47 +00:00
Console . WriteLine ( JsonConvert . SerializeObject ( results ) ) ;
2018-09-21 22:36:03 +00:00
break ;
case OutputModes . Text :
2018-09-22 13:25:10 +00:00
int i = 0 ;
foreach ( SearchResult nextResult in results ) {
Console . WriteLine ( $"#{i}: {nextResult}" ) ;
i + + ;
}
2018-09-21 22:36:03 +00:00
break ;
}
return 0 ;
}
2018-09-22 16:12:46 +00:00
private static int HandleContextGeneration ( )
{
if ( string . IsNullOrEmpty ( Name ) ) {
Console . Error . WriteLine ( "Error: No document name specified." ) ;
return 1 ;
}
if ( string . IsNullOrEmpty ( Query ) ) {
Console . Error . WriteLine ( "Error: No query specified." ) ;
return 1 ;
}
if ( SearchIndexFilepath = = string . Empty ) {
Console . Error . WriteLine ( "Error: No search index file path specified." ) ;
return 1 ;
}
SearchBox searchBox = JsonConvert . DeserializeObject < SearchBox > (
File . ReadAllText ( SearchIndexFilepath )
) ;
ContextSettings generationSettings = new ContextSettings ( ) ;
switch ( OutputMode ) {
case OutputModes . Json :
Console . Error . WriteLine ( "Error: JSON output for context generation is not supported." ) ;
return 1 ;
case OutputModes . Html :
generationSettings . Html = true ;
break ;
case OutputModes . Text :
generationSettings . Html = false ;
break ;
}
Console . WriteLine ( searchBox . GenerateContext ( Name , Source . ReadToEnd ( ) , Query , generationSettings ) ) ;
return 0 ;
}
2018-09-11 13:27:25 +00:00
private static int HandleIndex ( )
2018-09-09 20:02:40 +00:00
{
Index index = new Index ( Source . ReadToEnd ( ) ) ;
switch ( OutputMode )
{
case OutputModes . Json :
Console . WriteLine ( JsonConvert . SerializeObject ( index ) ) ;
break ;
case OutputModes . Text :
Console . WriteLine ( index ) ;
break ;
}
return 0 ;
2018-09-02 15:36:37 +00:00
}
}
}