Hook up empty SearchBox class
This commit is contained in:
parent
39d1d6f90d
commit
959aa219ce
10 changed files with 180 additions and 71 deletions
|
@ -4,8 +4,8 @@ using System.IO;
|
|||
using System.Linq;
|
||||
using Newtonsoft.Json;
|
||||
|
||||
using SearchBox;
|
||||
|
||||
using LibSearchBox;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace SearchBoxCLI
|
||||
{
|
||||
|
@ -13,9 +13,9 @@ namespace SearchBoxCLI
|
|||
{
|
||||
Query,
|
||||
Index,
|
||||
InvertedIndexAdd,
|
||||
InvertedIndexRemove,
|
||||
InvertedIndexUpdate
|
||||
Add,
|
||||
Remove,
|
||||
Update
|
||||
}
|
||||
|
||||
enum OutputModes
|
||||
|
@ -27,9 +27,10 @@ namespace SearchBoxCLI
|
|||
class MainClass {
|
||||
private static OperatingModes Mode = OperatingModes.Query;
|
||||
private static string Name = string.Empty;
|
||||
private static string InvIndexFilepath = string.Empty;
|
||||
private static string IdMapFilepath = string.Empty;
|
||||
private static IEnumerable<string> Tags;
|
||||
private static string SearchIndexFilepath = string.Empty;
|
||||
private static TextReader Source = Console.In;
|
||||
private static TextReader SourceOld = null, SourceNew = null;
|
||||
private static OutputModes OutputMode = OutputModes.Json;
|
||||
|
||||
public static int Main(string[] args)
|
||||
|
@ -43,7 +44,20 @@ namespace SearchBoxCLI
|
|||
case "source":
|
||||
string sourceFilename = args[++i];
|
||||
Source = new StreamReader(sourceFilename);
|
||||
Name = sourceFilename;
|
||||
Name = Name.Length > 0 ? Name : sourceFilename;
|
||||
break;
|
||||
|
||||
case "old-source":
|
||||
SourceOld = new StreamReader(args[++i]);
|
||||
break;
|
||||
case "new-source":
|
||||
string newSourceFilename = args[++i];
|
||||
SourceNew = new StreamReader(newSourceFilename);
|
||||
Name = Name.Length > 0 ? Name : newSourceFilename;
|
||||
break;
|
||||
|
||||
case "tags":
|
||||
Tags = Regex.Split(args[++i], @",\s+");
|
||||
break;
|
||||
|
||||
case "n":
|
||||
|
@ -51,18 +65,36 @@ namespace SearchBoxCLI
|
|||
Name = args[++i];
|
||||
break;
|
||||
|
||||
case "invindex":
|
||||
InvIndexFilepath = args[++i];
|
||||
break;
|
||||
|
||||
case "idmap":
|
||||
|
||||
case "index":
|
||||
SearchIndexFilepath = args[++i];
|
||||
break;
|
||||
|
||||
case "help":
|
||||
return HandleHelp();
|
||||
|
||||
default:
|
||||
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (extras.Count < 1) return HandleHelp();
|
||||
string modeText = extras.First(); extras.RemoveAt(0);
|
||||
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText);
|
||||
|
||||
switch (Mode) {
|
||||
case OperatingModes.Index: return HandleIndex();
|
||||
case OperatingModes.Add: return HandleAdd();
|
||||
case OperatingModes.Remove: return HandleRemove();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static int HandleHelp()
|
||||
{
|
||||
Console.WriteLine("SearchBox");
|
||||
Console.WriteLine("---------");
|
||||
Console.WriteLine("A standalone full-text search engine.");
|
||||
Console.WriteLine("A standalone full-text search engine. Operates on plain text sources.");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Usage:");
|
||||
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
|
||||
|
@ -70,72 +102,68 @@ namespace SearchBoxCLI
|
|||
Console.WriteLine("Modes:");
|
||||
Console.WriteLine(" query Query a pre-existing inverted search index");
|
||||
Console.WriteLine(" index Generate a raw index of the source document.");
|
||||
Console.WriteLine(" add Add a named document to an inverted search index.");
|
||||
Console.WriteLine(" remove Remove a named document from an inverted search index.");
|
||||
Console.WriteLine(" add Add a named document to a search index.");
|
||||
Console.WriteLine(" remove Remove a named document from a search index.");
|
||||
Console.WriteLine(" update Update a named document in a search index.");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Options:");
|
||||
Console.WriteLine(" --source, -s Specifies the source document {index, add}");
|
||||
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add}");
|
||||
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
|
||||
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
|
||||
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove}");
|
||||
Console.WriteLine(" --idmap, -i Specifies the location of the id map, which is used to map document names onto their nuemical ids {add, remove}");
|
||||
Console.WriteLine(" --invindex Specifies the location of the inverted search index to use {add, remove}");
|
||||
Console.WriteLine(" ");
|
||||
break;
|
||||
|
||||
default:
|
||||
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
|
||||
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
|
||||
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Examples:");
|
||||
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
|
||||
Console.WriteLine();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
string modeText = extras.First(); extras.RemoveAt(0);
|
||||
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText);
|
||||
|
||||
switch (Mode) {
|
||||
case OperatingModes.Index: return HandleIndex();
|
||||
case OperatingModes.InvertedIndexAdd: return HandleInvIndexAdd();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static int HandleInvIndexAdd()
|
||||
private static int HandleAdd()
|
||||
{
|
||||
if (Name == string.Empty) {
|
||||
Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
|
||||
return 1;
|
||||
}
|
||||
if (InvIndexFilepath == string.Empty) {
|
||||
Console.Error.WriteLine("Error: No inverted index filepath specified.");
|
||||
return 1;
|
||||
|
||||
// --------------------------------------
|
||||
|
||||
SearchBox searchBox;
|
||||
if (!File.Exists(SearchIndexFilepath))
|
||||
searchBox = new SearchBox();
|
||||
else
|
||||
searchBox = JsonConvert.DeserializeObject<SearchBox>(File.ReadAllText(SearchIndexFilepath));
|
||||
|
||||
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
|
||||
|
||||
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
||||
|
||||
return 0;
|
||||
}
|
||||
if (IdMapFilepath == string.Empty) {
|
||||
Console.Error.WriteLine("Error: No id map filepath specified.");
|
||||
|
||||
private static int HandleRemove()
|
||||
{
|
||||
if (Name == string.Empty) {
|
||||
Console.Error.WriteLine("Error: The document name must be specified when removing a document!");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
|
||||
if (!File.Exists(InvIndexFilepath))
|
||||
File.WriteAllText(InvIndexFilepath, "[]");
|
||||
if (!File.Exists(IdMapFilepath))
|
||||
File.WriteAllText(InvIndexFilepath, "{}");
|
||||
|
||||
IdMap idMap = JsonConvert.DeserializeObject<IdMap>(File.ReadAllText(IdMapFilepath));
|
||||
int newId = idMap.GetId(Name == string.Empty ? Source);
|
||||
|
||||
InvertedIndex invertedIndex = JsonConvert.DeserializeObject<InvertedIndex>(
|
||||
File.ReadAllText(InvIndexFilepath)
|
||||
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
|
||||
File.ReadAllText(SearchIndexFilepath)
|
||||
);
|
||||
Index newIndex = new Index(Source.ReadToEnd());
|
||||
invertedIndex.AddIndex(newId, newIndex);
|
||||
|
||||
File.WriteAllText(InvIndexFilepath, JsonConvert.SerializeObject(invertedIndex));
|
||||
File.WriteAllText(IdMapFilepath, JsonConvert.SerializeObject(idMap));
|
||||
searchBox.RemoveDocument(Name);
|
||||
|
||||
|
||||
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public static int HandleIndex()
|
||||
private static int HandleIndex()
|
||||
{
|
||||
Index index = new Index(Source.ReadToEnd());
|
||||
switch (OutputMode)
|
||||
|
|
23
SearchBox/DocumentMeta.cs
Normal file
23
SearchBox/DocumentMeta.cs
Normal file
|
@ -0,0 +1,23 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace LibSearchBox
|
||||
{
|
||||
public class DocumentMeta
|
||||
{
|
||||
public string Title { get; set; }
|
||||
public List<string> Tags { get; private set; }
|
||||
|
||||
public DocumentMeta(string inTitle, IEnumerable<string> inTags)
|
||||
{
|
||||
Title = inTitle;
|
||||
Tags = new List<string>(inTags);
|
||||
}
|
||||
|
||||
public void ReplaceTags(IEnumerable<string> newTags)
|
||||
{
|
||||
Tags.Clear();
|
||||
Tags.AddRange(newTags);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,16 +3,16 @@ using System.Collections.Generic;
|
|||
using System.Text;
|
||||
using Stackoverflow.Utilities;
|
||||
|
||||
namespace SearchBox
|
||||
namespace LibSearchBox
|
||||
{
|
||||
public class IdNotFoundException : Exception { public IdNotFoundException(string message) : base(message) { } }
|
||||
|
||||
public class IdMapper
|
||||
public class IdMap
|
||||
{
|
||||
private int nextId = 0;
|
||||
public BiDictionary<int, string> map = new BiDictionary<int, string>();
|
||||
|
||||
public IdMapper()
|
||||
public IdMap()
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -43,9 +43,11 @@ namespace SearchBox
|
|||
map.Add(id, newPageName);
|
||||
}
|
||||
|
||||
public void DeletePageName(string pageName)
|
||||
public int DeletePageName(string pageName)
|
||||
{
|
||||
int id = GetId(pageName);
|
||||
map.RemoveBySecond(pageName);
|
||||
return id;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
|
|
|
@ -5,7 +5,7 @@ using System.IO;
|
|||
using System.Text;
|
||||
using SBRL.Utilities;
|
||||
|
||||
namespace SearchBox
|
||||
namespace LibSearchBox
|
||||
{
|
||||
[Flags]
|
||||
public enum IndexOptions
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace SearchBox
|
||||
namespace LibSearchBox
|
||||
{
|
||||
public class InvertedIndex
|
||||
{
|
||||
|
@ -26,9 +26,9 @@ namespace SearchBox
|
|||
return true;
|
||||
}
|
||||
|
||||
public bool RemoveIndex(int pageId, Index newIndex)
|
||||
public bool RemoveIndex(int pageId, Index oldIndex)
|
||||
{
|
||||
foreach (string token in newIndex.Tokens())
|
||||
foreach (string token in oldIndex.Tokens())
|
||||
{
|
||||
if (!invertedIndex.ContainsKey(token) || !invertedIndex[token].ContainsKey(pageId)) continue;
|
||||
|
||||
|
@ -37,5 +37,22 @@ namespace SearchBox
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public bool ReplaceIndex(int pageId, Index oldIndex, Index newIndex)
|
||||
{
|
||||
if (!RemoveIndex(pageId, oldIndex)) return false;
|
||||
if (!AddIndex(pageId, newIndex)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool RemoveById(int pageId)
|
||||
{
|
||||
foreach (KeyValuePair<string, ConcurrentDictionary<int, List<int>>> pair in invertedIndex) {
|
||||
if (!pair.Value.ContainsKey(pageId)) continue;
|
||||
if (!pair.Value.TryRemove(pageId, out List<int> noop))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,48 @@
|
|||
using System;
|
||||
namespace SearchBox
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace LibSearchBox
|
||||
{
|
||||
public class SearchBoxException : Exception { public SearchBoxException(string message) : base(message) { } }
|
||||
|
||||
public class SearchBox
|
||||
{
|
||||
private IdMap idMap = new IdMap();
|
||||
private InvertedIndex index = new InvertedIndex();
|
||||
private ConcurrentDictionary<int, DocumentMeta> metaTable = new ConcurrentDictionary<int, DocumentMeta>();
|
||||
|
||||
public SearchBox()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public void AddDocument(string title, IEnumerable<string> tags, string content)
|
||||
{
|
||||
DocumentMeta info = new DocumentMeta(title, tags);
|
||||
int id = idMap.GetId(info.Title);
|
||||
Index upsideIndex = new Index(content);
|
||||
index.AddIndex(id, upsideIndex);
|
||||
}
|
||||
|
||||
public void UpdateDocument(string title, IEnumerable<string> newTags, string oldContent, string newContent)
|
||||
{
|
||||
int id = idMap.GetId(title);
|
||||
DocumentMeta info = metaTable[id];
|
||||
info.ReplaceTags(newTags);
|
||||
|
||||
Index oldIndex = new Index(oldContent), newIndex = new Index(newContent);
|
||||
if (!index.ReplaceIndex(id, oldIndex, newIndex))
|
||||
throw new Exception($"Error: Failed to replace index for document with title {title}.");
|
||||
}
|
||||
|
||||
public void RemoveDocument(string title)
|
||||
{
|
||||
int id = idMap.DeletePageName(title);
|
||||
metaTable.TryRemove(id, out DocumentMeta noop);
|
||||
if (!index.RemoveById(id))
|
||||
throw new SearchBoxException($"Failed to remove page with title '{title}' from inverted index.");
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@
|
|||
<Compile Include="InvertedIndex.cs" />
|
||||
<Compile Include="IdMap.cs" />
|
||||
<Compile Include="Utilities\BiDictionary.cs" />
|
||||
<Compile Include="DocumentMeta.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Folder Include="EmbeddedFiles\" />
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace SearchBox
|
||||
namespace LibSearchBox
|
||||
{
|
||||
public class StopwordTester
|
||||
{
|
||||
|
|
|
@ -3,10 +3,10 @@ using System.Collections;
|
|||
using System.Collections.Generic;
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
using SearchBox.Utilities;
|
||||
using LibSearchBox.Utilities;
|
||||
using UnidecodeSharpFork;
|
||||
|
||||
namespace SearchBox
|
||||
namespace LibSearchBox
|
||||
{
|
||||
[Flags]
|
||||
public enum TokenizerOptions
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
using System;
|
||||
|
||||
namespace SearchBox.Utilities
|
||||
namespace LibSearchBox.Utilities
|
||||
{
|
||||
public static class StringPlus
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue