Hook up empty SearchBox class

This commit is contained in:
Starbeamrainbowlabs 2018-09-11 14:27:25 +01:00
parent 39d1d6f90d
commit 959aa219ce
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
10 changed files with 180 additions and 71 deletions

View file

@ -4,8 +4,8 @@ using System.IO;
using System.Linq;
using Newtonsoft.Json;
using SearchBox;
using LibSearchBox;
using System.Text.RegularExpressions;
namespace SearchBoxCLI
{
@ -13,9 +13,9 @@ namespace SearchBoxCLI
{
Query,
Index,
InvertedIndexAdd,
InvertedIndexRemove,
InvertedIndexUpdate
Add,
Remove,
Update
}
enum OutputModes
@ -27,9 +27,10 @@ namespace SearchBoxCLI
class MainClass {
private static OperatingModes Mode = OperatingModes.Query;
private static string Name = string.Empty;
private static string InvIndexFilepath = string.Empty;
private static string IdMapFilepath = string.Empty;
private static IEnumerable<string> Tags;
private static string SearchIndexFilepath = string.Empty;
private static TextReader Source = Console.In;
private static TextReader SourceOld = null, SourceNew = null;
private static OutputModes OutputMode = OutputModes.Json;
public static int Main(string[] args)
@ -43,7 +44,20 @@ namespace SearchBoxCLI
case "source":
string sourceFilename = args[++i];
Source = new StreamReader(sourceFilename);
Name = sourceFilename;
Name = Name.Length > 0 ? Name : sourceFilename;
break;
case "old-source":
SourceOld = new StreamReader(args[++i]);
break;
case "new-source":
string newSourceFilename = args[++i];
SourceNew = new StreamReader(newSourceFilename);
Name = Name.Length > 0 ? Name : newSourceFilename;
break;
case "tags":
Tags = Regex.Split(args[++i], @",\s+");
break;
case "n":
@ -51,18 +65,36 @@ namespace SearchBoxCLI
Name = args[++i];
break;
case "invindex":
InvIndexFilepath = args[++i];
break;
case "idmap":
case "index":
SearchIndexFilepath = args[++i];
break;
case "help":
return HandleHelp();
default:
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
return 1;
}
}
if (extras.Count < 1) return HandleHelp();
string modeText = extras.First(); extras.RemoveAt(0);
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText);
switch (Mode) {
case OperatingModes.Index: return HandleIndex();
case OperatingModes.Add: return HandleAdd();
case OperatingModes.Remove: return HandleRemove();
}
return 0;
}
private static int HandleHelp()
{
Console.WriteLine("SearchBox");
Console.WriteLine("---------");
Console.WriteLine("A standalone full-text search engine.");
Console.WriteLine("A standalone full-text search engine. Operates on plain text sources.");
Console.WriteLine();
Console.WriteLine("Usage:");
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
@ -70,72 +102,68 @@ namespace SearchBoxCLI
Console.WriteLine("Modes:");
Console.WriteLine(" query Query a pre-existing inverted search index");
Console.WriteLine(" index Generate a raw index of the source document.");
Console.WriteLine(" add Add a named document to an inverted search index.");
Console.WriteLine(" remove Remove a named document from an inverted search index.");
Console.WriteLine(" add Add a named document to a search index.");
Console.WriteLine(" remove Remove a named document from a search index.");
Console.WriteLine(" update Update a named document in a search index.");
Console.WriteLine();
Console.WriteLine("Options:");
Console.WriteLine(" --source, -s Specifies the source document {index, add}");
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add}");
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove}");
Console.WriteLine(" --idmap, -i Specifies the location of the id map, which is used to map document names onto their nuemical ids {add, remove}");
Console.WriteLine(" --invindex Specifies the location of the inverted search index to use {add, remove}");
Console.WriteLine(" ");
break;
default:
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
Console.WriteLine();
Console.WriteLine("Examples:");
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
Console.WriteLine();
return 1;
}
}
string modeText = extras.First(); extras.RemoveAt(0);
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText);
switch (Mode) {
case OperatingModes.Index: return HandleIndex();
case OperatingModes.InvertedIndexAdd: return HandleInvIndexAdd();
}
return 0;
}
private static int HandleInvIndexAdd()
private static int HandleAdd()
{
if (Name == string.Empty) {
Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
return 1;
}
if (InvIndexFilepath == string.Empty) {
Console.Error.WriteLine("Error: No inverted index filepath specified.");
return 1;
// --------------------------------------
SearchBox searchBox;
if (!File.Exists(SearchIndexFilepath))
searchBox = new SearchBox();
else
searchBox = JsonConvert.DeserializeObject<SearchBox>(File.ReadAllText(SearchIndexFilepath));
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
return 0;
}
if (IdMapFilepath == string.Empty) {
Console.Error.WriteLine("Error: No id map filepath specified.");
private static int HandleRemove()
{
if (Name == string.Empty) {
Console.Error.WriteLine("Error: The document name must be specified when removing a document!");
return 1;
}
// --------------------------------------
if (!File.Exists(InvIndexFilepath))
File.WriteAllText(InvIndexFilepath, "[]");
if (!File.Exists(IdMapFilepath))
File.WriteAllText(InvIndexFilepath, "{}");
IdMap idMap = JsonConvert.DeserializeObject<IdMap>(File.ReadAllText(IdMapFilepath));
int newId = idMap.GetId(Name == string.Empty ? Source);
InvertedIndex invertedIndex = JsonConvert.DeserializeObject<InvertedIndex>(
File.ReadAllText(InvIndexFilepath)
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
File.ReadAllText(SearchIndexFilepath)
);
Index newIndex = new Index(Source.ReadToEnd());
invertedIndex.AddIndex(newId, newIndex);
File.WriteAllText(InvIndexFilepath, JsonConvert.SerializeObject(invertedIndex));
File.WriteAllText(IdMapFilepath, JsonConvert.SerializeObject(idMap));
searchBox.RemoveDocument(Name);
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
return 0;
}
public static int HandleIndex()
private static int HandleIndex()
{
Index index = new Index(Source.ReadToEnd());
switch (OutputMode)

23
SearchBox/DocumentMeta.cs Normal file
View file

@ -0,0 +1,23 @@
using System;
using System.Collections.Generic;
namespace LibSearchBox
{
public class DocumentMeta
{
public string Title { get; set; }
public List<string> Tags { get; private set; }
public DocumentMeta(string inTitle, IEnumerable<string> inTags)
{
Title = inTitle;
Tags = new List<string>(inTags);
}
public void ReplaceTags(IEnumerable<string> newTags)
{
Tags.Clear();
Tags.AddRange(newTags);
}
}
}

View file

@ -3,16 +3,16 @@ using System.Collections.Generic;
using System.Text;
using Stackoverflow.Utilities;
namespace SearchBox
namespace LibSearchBox
{
public class IdNotFoundException : Exception { public IdNotFoundException(string message) : base(message) { } }
public class IdMapper
public class IdMap
{
private int nextId = 0;
public BiDictionary<int, string> map = new BiDictionary<int, string>();
public IdMapper()
public IdMap()
{
}
@ -43,9 +43,11 @@ namespace SearchBox
map.Add(id, newPageName);
}
public void DeletePageName(string pageName)
public int DeletePageName(string pageName)
{
int id = GetId(pageName);
map.RemoveBySecond(pageName);
return id;
}
public override string ToString()

View file

@ -5,7 +5,7 @@ using System.IO;
using System.Text;
using SBRL.Utilities;
namespace SearchBox
namespace LibSearchBox
{
[Flags]
public enum IndexOptions

View file

@ -2,7 +2,7 @@
using System.Collections.Concurrent;
using System.Collections.Generic;
namespace SearchBox
namespace LibSearchBox
{
public class InvertedIndex
{
@ -26,9 +26,9 @@ namespace SearchBox
return true;
}
public bool RemoveIndex(int pageId, Index newIndex)
public bool RemoveIndex(int pageId, Index oldIndex)
{
foreach (string token in newIndex.Tokens())
foreach (string token in oldIndex.Tokens())
{
if (!invertedIndex.ContainsKey(token) || !invertedIndex[token].ContainsKey(pageId)) continue;
@ -37,5 +37,22 @@ namespace SearchBox
}
return false;
}
public bool ReplaceIndex(int pageId, Index oldIndex, Index newIndex)
{
if (!RemoveIndex(pageId, oldIndex)) return false;
if (!AddIndex(pageId, newIndex)) return false;
return true;
}
public bool RemoveById(int pageId)
{
foreach (KeyValuePair<string, ConcurrentDictionary<int, List<int>>> pair in invertedIndex) {
if (!pair.Value.ContainsKey(pageId)) continue;
if (!pair.Value.TryRemove(pageId, out List<int> noop))
return false;
}
return true;
}
}
}

View file

@ -1,10 +1,48 @@
using System;
namespace SearchBox
using System.Collections.Concurrent;
using System.Collections.Generic;
namespace LibSearchBox
{
public class SearchBoxException : Exception { public SearchBoxException(string message) : base(message) { } }
public class SearchBox
{
private IdMap idMap = new IdMap();
private InvertedIndex index = new InvertedIndex();
private ConcurrentDictionary<int, DocumentMeta> metaTable = new ConcurrentDictionary<int, DocumentMeta>();
public SearchBox()
{
}
public void AddDocument(string title, IEnumerable<string> tags, string content)
{
DocumentMeta info = new DocumentMeta(title, tags);
int id = idMap.GetId(info.Title);
Index upsideIndex = new Index(content);
index.AddIndex(id, upsideIndex);
}
public void UpdateDocument(string title, IEnumerable<string> newTags, string oldContent, string newContent)
{
int id = idMap.GetId(title);
DocumentMeta info = metaTable[id];
info.ReplaceTags(newTags);
Index oldIndex = new Index(oldContent), newIndex = new Index(newContent);
if (!index.ReplaceIndex(id, oldIndex, newIndex))
throw new Exception($"Error: Failed to replace index for document with title {title}.");
}
public void RemoveDocument(string title)
{
int id = idMap.DeletePageName(title);
metaTable.TryRemove(id, out DocumentMeta noop);
if (!index.RemoveById(id))
throw new SearchBoxException($"Failed to remove page with title '{title}' from inverted index.");
}
}
}

View file

@ -48,6 +48,7 @@
<Compile Include="InvertedIndex.cs" />
<Compile Include="IdMap.cs" />
<Compile Include="Utilities\BiDictionary.cs" />
<Compile Include="DocumentMeta.cs" />
</ItemGroup>
<ItemGroup>
<Folder Include="EmbeddedFiles\" />

View file

@ -1,7 +1,7 @@
using System;
using System.Collections.Generic;
namespace SearchBox
namespace LibSearchBox
{
public class StopwordTester
{

View file

@ -3,10 +3,10 @@ using System.Collections;
using System.Collections.Generic;
using System.Net;
using System.Text.RegularExpressions;
using SearchBox.Utilities;
using LibSearchBox.Utilities;
using UnidecodeSharpFork;
namespace SearchBox
namespace LibSearchBox
{
[Flags]
public enum TokenizerOptions

View file

@ -1,6 +1,6 @@
using System;
namespace SearchBox.Utilities
namespace LibSearchBox.Utilities
{
public static class StringPlus
{