Hook up empty SearchBox class

This commit is contained in:
Starbeamrainbowlabs 2018-09-11 14:27:25 +01:00
parent 39d1d6f90d
commit 959aa219ce
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
10 changed files with 180 additions and 71 deletions

View File

@ -4,8 +4,8 @@ using System.IO;
using System.Linq; using System.Linq;
using Newtonsoft.Json; using Newtonsoft.Json;
using SearchBox; using LibSearchBox;
using System.Text.RegularExpressions;
namespace SearchBoxCLI namespace SearchBoxCLI
{ {
@ -13,9 +13,9 @@ namespace SearchBoxCLI
{ {
Query, Query,
Index, Index,
InvertedIndexAdd, Add,
InvertedIndexRemove, Remove,
InvertedIndexUpdate Update
} }
enum OutputModes enum OutputModes
@ -27,9 +27,10 @@ namespace SearchBoxCLI
class MainClass { class MainClass {
private static OperatingModes Mode = OperatingModes.Query; private static OperatingModes Mode = OperatingModes.Query;
private static string Name = string.Empty; private static string Name = string.Empty;
private static string InvIndexFilepath = string.Empty; private static IEnumerable<string> Tags;
private static string IdMapFilepath = string.Empty; private static string SearchIndexFilepath = string.Empty;
private static TextReader Source = Console.In; private static TextReader Source = Console.In;
private static TextReader SourceOld = null, SourceNew = null;
private static OutputModes OutputMode = OutputModes.Json; private static OutputModes OutputMode = OutputModes.Json;
public static int Main(string[] args) public static int Main(string[] args)
@ -43,7 +44,20 @@ namespace SearchBoxCLI
case "source": case "source":
string sourceFilename = args[++i]; string sourceFilename = args[++i];
Source = new StreamReader(sourceFilename); Source = new StreamReader(sourceFilename);
Name = sourceFilename; Name = Name.Length > 0 ? Name : sourceFilename;
break;
case "old-source":
SourceOld = new StreamReader(args[++i]);
break;
case "new-source":
string newSourceFilename = args[++i];
SourceNew = new StreamReader(newSourceFilename);
Name = Name.Length > 0 ? Name : newSourceFilename;
break;
case "tags":
Tags = Regex.Split(args[++i], @",\s+");
break; break;
case "n": case "n":
@ -51,91 +65,105 @@ namespace SearchBoxCLI
Name = args[++i]; Name = args[++i];
break; break;
case "invindex": case "index":
InvIndexFilepath = args[++i]; SearchIndexFilepath = args[++i];
break;
case "idmap":
break; break;
case "help": case "help":
Console.WriteLine("SearchBox"); return HandleHelp();
Console.WriteLine("---------");
Console.WriteLine("A standalone full-text search engine.");
Console.WriteLine();
Console.WriteLine("Usage:");
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
Console.WriteLine();
Console.WriteLine("Modes:");
Console.WriteLine(" query Query a pre-existing inverted search index");
Console.WriteLine(" index Generate a raw index of the source document.");
Console.WriteLine(" add Add a named document to an inverted search index.");
Console.WriteLine(" remove Remove a named document from an inverted search index.");
Console.WriteLine();
Console.WriteLine("Options:");
Console.WriteLine(" --source, -s Specifies the source document {index, add}");
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove}");
Console.WriteLine(" --idmap, -i Specifies the location of the id map, which is used to map document names onto their nuemical ids {add, remove}");
Console.WriteLine(" --invindex Specifies the location of the inverted search index to use {add, remove}");
Console.WriteLine(" ");
break;
default: default:
Console.Error.WriteLine($"Error: Unknown property {args[i]}."); Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
return 1; return 1;
} }
} }
if (extras.Count < 1) return HandleHelp();
string modeText = extras.First(); extras.RemoveAt(0); string modeText = extras.First(); extras.RemoveAt(0);
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText); Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText);
switch (Mode) { switch (Mode) {
case OperatingModes.Index: return HandleIndex(); case OperatingModes.Index: return HandleIndex();
case OperatingModes.InvertedIndexAdd: return HandleInvIndexAdd(); case OperatingModes.Add: return HandleAdd();
case OperatingModes.Remove: return HandleRemove();
} }
return 0; return 0;
} }
private static int HandleInvIndexAdd() private static int HandleHelp()
{
Console.WriteLine("SearchBox");
Console.WriteLine("---------");
Console.WriteLine("A standalone full-text search engine. Operates on plain text sources.");
Console.WriteLine();
Console.WriteLine("Usage:");
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
Console.WriteLine();
Console.WriteLine("Modes:");
Console.WriteLine(" query Query a pre-existing inverted search index");
Console.WriteLine(" index Generate a raw index of the source document.");
Console.WriteLine(" add Add a named document to a search index.");
Console.WriteLine(" remove Remove a named document from a search index.");
Console.WriteLine(" update Update a named document in a search index.");
Console.WriteLine();
Console.WriteLine("Options:");
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add}");
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove}");
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
Console.WriteLine();
Console.WriteLine("Examples:");
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
Console.WriteLine();
return 1;
}
private static int HandleAdd()
{ {
if (Name == string.Empty) { if (Name == string.Empty) {
Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!"); Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
return 1; return 1;
} }
if (InvIndexFilepath == string.Empty) {
Console.Error.WriteLine("Error: No inverted index filepath specified."); // --------------------------------------
return 1;
} SearchBox searchBox;
if (IdMapFilepath == string.Empty) { if (!File.Exists(SearchIndexFilepath))
Console.Error.WriteLine("Error: No id map filepath specified."); searchBox = new SearchBox();
else
searchBox = JsonConvert.DeserializeObject<SearchBox>(File.ReadAllText(SearchIndexFilepath));
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
return 0;
}
private static int HandleRemove()
{
if (Name == string.Empty) {
Console.Error.WriteLine("Error: The document name must be specified when removing a document!");
return 1; return 1;
} }
// -------------------------------------- // --------------------------------------
if (!File.Exists(InvIndexFilepath)) SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
File.WriteAllText(InvIndexFilepath, "[]"); File.ReadAllText(SearchIndexFilepath)
if (!File.Exists(IdMapFilepath))
File.WriteAllText(InvIndexFilepath, "{}");
IdMap idMap = JsonConvert.DeserializeObject<IdMap>(File.ReadAllText(IdMapFilepath));
int newId = idMap.GetId(Name == string.Empty ? Source);
InvertedIndex invertedIndex = JsonConvert.DeserializeObject<InvertedIndex>(
File.ReadAllText(InvIndexFilepath)
); );
Index newIndex = new Index(Source.ReadToEnd());
invertedIndex.AddIndex(newId, newIndex);
File.WriteAllText(InvIndexFilepath, JsonConvert.SerializeObject(invertedIndex)); searchBox.RemoveDocument(Name);
File.WriteAllText(IdMapFilepath, JsonConvert.SerializeObject(idMap));
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
return 0; return 0;
} }
public static int HandleIndex() private static int HandleIndex()
{ {
Index index = new Index(Source.ReadToEnd()); Index index = new Index(Source.ReadToEnd());
switch (OutputMode) switch (OutputMode)

23
SearchBox/DocumentMeta.cs Normal file
View File

@ -0,0 +1,23 @@
using System;
using System.Collections.Generic;
namespace LibSearchBox
{
public class DocumentMeta
{
public string Title { get; set; }
public List<string> Tags { get; private set; }
public DocumentMeta(string inTitle, IEnumerable<string> inTags)
{
Title = inTitle;
Tags = new List<string>(inTags);
}
public void ReplaceTags(IEnumerable<string> newTags)
{
Tags.Clear();
Tags.AddRange(newTags);
}
}
}

View File

@ -3,16 +3,16 @@ using System.Collections.Generic;
using System.Text; using System.Text;
using Stackoverflow.Utilities; using Stackoverflow.Utilities;
namespace SearchBox namespace LibSearchBox
{ {
public class IdNotFoundException : Exception { public IdNotFoundException(string message) : base(message) { } } public class IdNotFoundException : Exception { public IdNotFoundException(string message) : base(message) { } }
public class IdMapper public class IdMap
{ {
private int nextId = 0; private int nextId = 0;
public BiDictionary<int, string> map = new BiDictionary<int, string>(); public BiDictionary<int, string> map = new BiDictionary<int, string>();
public IdMapper() public IdMap()
{ {
} }
@ -43,9 +43,11 @@ namespace SearchBox
map.Add(id, newPageName); map.Add(id, newPageName);
} }
public void DeletePageName(string pageName) public int DeletePageName(string pageName)
{ {
int id = GetId(pageName);
map.RemoveBySecond(pageName); map.RemoveBySecond(pageName);
return id;
} }
public override string ToString() public override string ToString()

View File

@ -5,7 +5,7 @@ using System.IO;
using System.Text; using System.Text;
using SBRL.Utilities; using SBRL.Utilities;
namespace SearchBox namespace LibSearchBox
{ {
[Flags] [Flags]
public enum IndexOptions public enum IndexOptions

View File

@ -2,7 +2,7 @@
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Collections.Generic; using System.Collections.Generic;
namespace SearchBox namespace LibSearchBox
{ {
public class InvertedIndex public class InvertedIndex
{ {
@ -26,9 +26,9 @@ namespace SearchBox
return true; return true;
} }
public bool RemoveIndex(int pageId, Index newIndex) public bool RemoveIndex(int pageId, Index oldIndex)
{ {
foreach (string token in newIndex.Tokens()) foreach (string token in oldIndex.Tokens())
{ {
if (!invertedIndex.ContainsKey(token) || !invertedIndex[token].ContainsKey(pageId)) continue; if (!invertedIndex.ContainsKey(token) || !invertedIndex[token].ContainsKey(pageId)) continue;
@ -37,5 +37,22 @@ namespace SearchBox
} }
return false; return false;
} }
public bool ReplaceIndex(int pageId, Index oldIndex, Index newIndex)
{
if (!RemoveIndex(pageId, oldIndex)) return false;
if (!AddIndex(pageId, newIndex)) return false;
return true;
}
public bool RemoveById(int pageId)
{
foreach (KeyValuePair<string, ConcurrentDictionary<int, List<int>>> pair in invertedIndex) {
if (!pair.Value.ContainsKey(pageId)) continue;
if (!pair.Value.TryRemove(pageId, out List<int> noop))
return false;
}
return true;
}
} }
} }

View File

@ -1,10 +1,48 @@
using System; using System;
namespace SearchBox using System.Collections.Concurrent;
using System.Collections.Generic;
namespace LibSearchBox
{ {
public class SearchBoxException : Exception { public SearchBoxException(string message) : base(message) { } }
public class SearchBox public class SearchBox
{ {
private IdMap idMap = new IdMap();
private InvertedIndex index = new InvertedIndex();
private ConcurrentDictionary<int, DocumentMeta> metaTable = new ConcurrentDictionary<int, DocumentMeta>();
public SearchBox() public SearchBox()
{ {
}
public void AddDocument(string title, IEnumerable<string> tags, string content)
{
DocumentMeta info = new DocumentMeta(title, tags);
int id = idMap.GetId(info.Title);
Index upsideIndex = new Index(content);
index.AddIndex(id, upsideIndex);
}
public void UpdateDocument(string title, IEnumerable<string> newTags, string oldContent, string newContent)
{
int id = idMap.GetId(title);
DocumentMeta info = metaTable[id];
info.ReplaceTags(newTags);
Index oldIndex = new Index(oldContent), newIndex = new Index(newContent);
if (!index.ReplaceIndex(id, oldIndex, newIndex))
throw new Exception($"Error: Failed to replace index for document with title {title}.");
}
public void RemoveDocument(string title)
{
int id = idMap.DeletePageName(title);
metaTable.TryRemove(id, out DocumentMeta noop);
if (!index.RemoveById(id))
throw new SearchBoxException($"Failed to remove page with title '{title}' from inverted index.");
} }
} }
} }

View File

@ -48,6 +48,7 @@
<Compile Include="InvertedIndex.cs" /> <Compile Include="InvertedIndex.cs" />
<Compile Include="IdMap.cs" /> <Compile Include="IdMap.cs" />
<Compile Include="Utilities\BiDictionary.cs" /> <Compile Include="Utilities\BiDictionary.cs" />
<Compile Include="DocumentMeta.cs" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Folder Include="EmbeddedFiles\" /> <Folder Include="EmbeddedFiles\" />

View File

@ -1,7 +1,7 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
namespace SearchBox namespace LibSearchBox
{ {
public class StopwordTester public class StopwordTester
{ {

View File

@ -3,10 +3,10 @@ using System.Collections;
using System.Collections.Generic; using System.Collections.Generic;
using System.Net; using System.Net;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using SearchBox.Utilities; using LibSearchBox.Utilities;
using UnidecodeSharpFork; using UnidecodeSharpFork;
namespace SearchBox namespace LibSearchBox
{ {
[Flags] [Flags]
public enum TokenizerOptions public enum TokenizerOptions

View File

@ -1,6 +1,6 @@
using System; using System;
namespace SearchBox.Utilities namespace LibSearchBox.Utilities
{ {
public static class StringPlus public static class StringPlus
{ {