Hook up empty SearchBox class
This commit is contained in:
parent
39d1d6f90d
commit
959aa219ce
10 changed files with 180 additions and 71 deletions
|
@ -4,8 +4,8 @@ using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using Newtonsoft.Json;
|
using Newtonsoft.Json;
|
||||||
|
|
||||||
using SearchBox;
|
using LibSearchBox;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
namespace SearchBoxCLI
|
namespace SearchBoxCLI
|
||||||
{
|
{
|
||||||
|
@ -13,9 +13,9 @@ namespace SearchBoxCLI
|
||||||
{
|
{
|
||||||
Query,
|
Query,
|
||||||
Index,
|
Index,
|
||||||
InvertedIndexAdd,
|
Add,
|
||||||
InvertedIndexRemove,
|
Remove,
|
||||||
InvertedIndexUpdate
|
Update
|
||||||
}
|
}
|
||||||
|
|
||||||
enum OutputModes
|
enum OutputModes
|
||||||
|
@ -27,9 +27,10 @@ namespace SearchBoxCLI
|
||||||
class MainClass {
|
class MainClass {
|
||||||
private static OperatingModes Mode = OperatingModes.Query;
|
private static OperatingModes Mode = OperatingModes.Query;
|
||||||
private static string Name = string.Empty;
|
private static string Name = string.Empty;
|
||||||
private static string InvIndexFilepath = string.Empty;
|
private static IEnumerable<string> Tags;
|
||||||
private static string IdMapFilepath = string.Empty;
|
private static string SearchIndexFilepath = string.Empty;
|
||||||
private static TextReader Source = Console.In;
|
private static TextReader Source = Console.In;
|
||||||
|
private static TextReader SourceOld = null, SourceNew = null;
|
||||||
private static OutputModes OutputMode = OutputModes.Json;
|
private static OutputModes OutputMode = OutputModes.Json;
|
||||||
|
|
||||||
public static int Main(string[] args)
|
public static int Main(string[] args)
|
||||||
|
@ -43,7 +44,20 @@ namespace SearchBoxCLI
|
||||||
case "source":
|
case "source":
|
||||||
string sourceFilename = args[++i];
|
string sourceFilename = args[++i];
|
||||||
Source = new StreamReader(sourceFilename);
|
Source = new StreamReader(sourceFilename);
|
||||||
Name = sourceFilename;
|
Name = Name.Length > 0 ? Name : sourceFilename;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "old-source":
|
||||||
|
SourceOld = new StreamReader(args[++i]);
|
||||||
|
break;
|
||||||
|
case "new-source":
|
||||||
|
string newSourceFilename = args[++i];
|
||||||
|
SourceNew = new StreamReader(newSourceFilename);
|
||||||
|
Name = Name.Length > 0 ? Name : newSourceFilename;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "tags":
|
||||||
|
Tags = Regex.Split(args[++i], @",\s+");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "n":
|
case "n":
|
||||||
|
@ -51,18 +65,36 @@ namespace SearchBoxCLI
|
||||||
Name = args[++i];
|
Name = args[++i];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "invindex":
|
case "index":
|
||||||
InvIndexFilepath = args[++i];
|
SearchIndexFilepath = args[++i];
|
||||||
break;
|
|
||||||
|
|
||||||
case "idmap":
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "help":
|
case "help":
|
||||||
|
return HandleHelp();
|
||||||
|
|
||||||
|
default:
|
||||||
|
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (extras.Count < 1) return HandleHelp();
|
||||||
|
string modeText = extras.First(); extras.RemoveAt(0);
|
||||||
|
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText);
|
||||||
|
|
||||||
|
switch (Mode) {
|
||||||
|
case OperatingModes.Index: return HandleIndex();
|
||||||
|
case OperatingModes.Add: return HandleAdd();
|
||||||
|
case OperatingModes.Remove: return HandleRemove();
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int HandleHelp()
|
||||||
|
{
|
||||||
Console.WriteLine("SearchBox");
|
Console.WriteLine("SearchBox");
|
||||||
Console.WriteLine("---------");
|
Console.WriteLine("---------");
|
||||||
Console.WriteLine("A standalone full-text search engine.");
|
Console.WriteLine("A standalone full-text search engine. Operates on plain text sources.");
|
||||||
Console.WriteLine();
|
Console.WriteLine();
|
||||||
Console.WriteLine("Usage:");
|
Console.WriteLine("Usage:");
|
||||||
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
|
Console.WriteLine(" ./SearchBox.exe {mode} [options]");
|
||||||
|
@ -70,72 +102,68 @@ namespace SearchBoxCLI
|
||||||
Console.WriteLine("Modes:");
|
Console.WriteLine("Modes:");
|
||||||
Console.WriteLine(" query Query a pre-existing inverted search index");
|
Console.WriteLine(" query Query a pre-existing inverted search index");
|
||||||
Console.WriteLine(" index Generate a raw index of the source document.");
|
Console.WriteLine(" index Generate a raw index of the source document.");
|
||||||
Console.WriteLine(" add Add a named document to an inverted search index.");
|
Console.WriteLine(" add Add a named document to a search index.");
|
||||||
Console.WriteLine(" remove Remove a named document from an inverted search index.");
|
Console.WriteLine(" remove Remove a named document from a search index.");
|
||||||
|
Console.WriteLine(" update Update a named document in a search index.");
|
||||||
Console.WriteLine();
|
Console.WriteLine();
|
||||||
Console.WriteLine("Options:");
|
Console.WriteLine("Options:");
|
||||||
Console.WriteLine(" --source, -s Specifies the source document {index, add}");
|
Console.WriteLine(" --source, -s Specifies the path to the source document {index, add}");
|
||||||
|
Console.WriteLine(" --old-source Specifies the path to the old version of the source document to update {update}");
|
||||||
|
Console.WriteLine(" --new-source Specifies the path to the new version of the source document to update {update}");
|
||||||
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove}");
|
Console.WriteLine(" --name, -n Sets the name of the source document {add, remove}");
|
||||||
Console.WriteLine(" --idmap, -i Specifies the location of the id map, which is used to map document names onto their nuemical ids {add, remove}");
|
Console.WriteLine(" --index Specifies the location of the search index to use {add, remove, update}");
|
||||||
Console.WriteLine(" --invindex Specifies the location of the inverted search index to use {add, remove}");
|
Console.WriteLine(" --tags Sets the tags to associate with the document. {add, update}");
|
||||||
Console.WriteLine(" ");
|
Console.WriteLine();
|
||||||
break;
|
Console.WriteLine("Examples:");
|
||||||
|
Console.WriteLine(" cat books/complex_knots.txt | ./SearchBox.exe add --name \"Complex Knots: How to do and undo them\"");
|
||||||
default:
|
Console.WriteLine();
|
||||||
Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
string modeText = extras.First(); extras.RemoveAt(0);
|
private static int HandleAdd()
|
||||||
Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText);
|
|
||||||
|
|
||||||
switch (Mode) {
|
|
||||||
case OperatingModes.Index: return HandleIndex();
|
|
||||||
case OperatingModes.InvertedIndexAdd: return HandleInvIndexAdd();
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int HandleInvIndexAdd()
|
|
||||||
{
|
{
|
||||||
if (Name == string.Empty) {
|
if (Name == string.Empty) {
|
||||||
Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
|
Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (InvIndexFilepath == string.Empty) {
|
|
||||||
Console.Error.WriteLine("Error: No inverted index filepath specified.");
|
// --------------------------------------
|
||||||
return 1;
|
|
||||||
|
SearchBox searchBox;
|
||||||
|
if (!File.Exists(SearchIndexFilepath))
|
||||||
|
searchBox = new SearchBox();
|
||||||
|
else
|
||||||
|
searchBox = JsonConvert.DeserializeObject<SearchBox>(File.ReadAllText(SearchIndexFilepath));
|
||||||
|
|
||||||
|
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
|
||||||
|
|
||||||
|
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
if (IdMapFilepath == string.Empty) {
|
|
||||||
Console.Error.WriteLine("Error: No id map filepath specified.");
|
private static int HandleRemove()
|
||||||
|
{
|
||||||
|
if (Name == string.Empty) {
|
||||||
|
Console.Error.WriteLine("Error: The document name must be specified when removing a document!");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------
|
// --------------------------------------
|
||||||
|
|
||||||
if (!File.Exists(InvIndexFilepath))
|
SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
|
||||||
File.WriteAllText(InvIndexFilepath, "[]");
|
File.ReadAllText(SearchIndexFilepath)
|
||||||
if (!File.Exists(IdMapFilepath))
|
|
||||||
File.WriteAllText(InvIndexFilepath, "{}");
|
|
||||||
|
|
||||||
IdMap idMap = JsonConvert.DeserializeObject<IdMap>(File.ReadAllText(IdMapFilepath));
|
|
||||||
int newId = idMap.GetId(Name == string.Empty ? Source);
|
|
||||||
|
|
||||||
InvertedIndex invertedIndex = JsonConvert.DeserializeObject<InvertedIndex>(
|
|
||||||
File.ReadAllText(InvIndexFilepath)
|
|
||||||
);
|
);
|
||||||
Index newIndex = new Index(Source.ReadToEnd());
|
|
||||||
invertedIndex.AddIndex(newId, newIndex);
|
|
||||||
|
|
||||||
File.WriteAllText(InvIndexFilepath, JsonConvert.SerializeObject(invertedIndex));
|
searchBox.RemoveDocument(Name);
|
||||||
File.WriteAllText(IdMapFilepath, JsonConvert.SerializeObject(idMap));
|
|
||||||
|
|
||||||
|
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int HandleIndex()
|
private static int HandleIndex()
|
||||||
{
|
{
|
||||||
Index index = new Index(Source.ReadToEnd());
|
Index index = new Index(Source.ReadToEnd());
|
||||||
switch (OutputMode)
|
switch (OutputMode)
|
||||||
|
|
23
SearchBox/DocumentMeta.cs
Normal file
23
SearchBox/DocumentMeta.cs
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
|
||||||
|
namespace LibSearchBox
|
||||||
|
{
|
||||||
|
public class DocumentMeta
|
||||||
|
{
|
||||||
|
public string Title { get; set; }
|
||||||
|
public List<string> Tags { get; private set; }
|
||||||
|
|
||||||
|
public DocumentMeta(string inTitle, IEnumerable<string> inTags)
|
||||||
|
{
|
||||||
|
Title = inTitle;
|
||||||
|
Tags = new List<string>(inTags);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void ReplaceTags(IEnumerable<string> newTags)
|
||||||
|
{
|
||||||
|
Tags.Clear();
|
||||||
|
Tags.AddRange(newTags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,16 +3,16 @@ using System.Collections.Generic;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using Stackoverflow.Utilities;
|
using Stackoverflow.Utilities;
|
||||||
|
|
||||||
namespace SearchBox
|
namespace LibSearchBox
|
||||||
{
|
{
|
||||||
public class IdNotFoundException : Exception { public IdNotFoundException(string message) : base(message) { } }
|
public class IdNotFoundException : Exception { public IdNotFoundException(string message) : base(message) { } }
|
||||||
|
|
||||||
public class IdMapper
|
public class IdMap
|
||||||
{
|
{
|
||||||
private int nextId = 0;
|
private int nextId = 0;
|
||||||
public BiDictionary<int, string> map = new BiDictionary<int, string>();
|
public BiDictionary<int, string> map = new BiDictionary<int, string>();
|
||||||
|
|
||||||
public IdMapper()
|
public IdMap()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,9 +43,11 @@ namespace SearchBox
|
||||||
map.Add(id, newPageName);
|
map.Add(id, newPageName);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void DeletePageName(string pageName)
|
public int DeletePageName(string pageName)
|
||||||
{
|
{
|
||||||
|
int id = GetId(pageName);
|
||||||
map.RemoveBySecond(pageName);
|
map.RemoveBySecond(pageName);
|
||||||
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override string ToString()
|
public override string ToString()
|
||||||
|
|
|
@ -5,7 +5,7 @@ using System.IO;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using SBRL.Utilities;
|
using SBRL.Utilities;
|
||||||
|
|
||||||
namespace SearchBox
|
namespace LibSearchBox
|
||||||
{
|
{
|
||||||
[Flags]
|
[Flags]
|
||||||
public enum IndexOptions
|
public enum IndexOptions
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
using System.Collections.Concurrent;
|
using System.Collections.Concurrent;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
|
||||||
namespace SearchBox
|
namespace LibSearchBox
|
||||||
{
|
{
|
||||||
public class InvertedIndex
|
public class InvertedIndex
|
||||||
{
|
{
|
||||||
|
@ -26,9 +26,9 @@ namespace SearchBox
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool RemoveIndex(int pageId, Index newIndex)
|
public bool RemoveIndex(int pageId, Index oldIndex)
|
||||||
{
|
{
|
||||||
foreach (string token in newIndex.Tokens())
|
foreach (string token in oldIndex.Tokens())
|
||||||
{
|
{
|
||||||
if (!invertedIndex.ContainsKey(token) || !invertedIndex[token].ContainsKey(pageId)) continue;
|
if (!invertedIndex.ContainsKey(token) || !invertedIndex[token].ContainsKey(pageId)) continue;
|
||||||
|
|
||||||
|
@ -37,5 +37,22 @@ namespace SearchBox
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public bool ReplaceIndex(int pageId, Index oldIndex, Index newIndex)
|
||||||
|
{
|
||||||
|
if (!RemoveIndex(pageId, oldIndex)) return false;
|
||||||
|
if (!AddIndex(pageId, newIndex)) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool RemoveById(int pageId)
|
||||||
|
{
|
||||||
|
foreach (KeyValuePair<string, ConcurrentDictionary<int, List<int>>> pair in invertedIndex) {
|
||||||
|
if (!pair.Value.ContainsKey(pageId)) continue;
|
||||||
|
if (!pair.Value.TryRemove(pageId, out List<int> noop))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,48 @@
|
||||||
using System;
|
using System;
|
||||||
namespace SearchBox
|
using System.Collections.Concurrent;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
|
||||||
|
namespace LibSearchBox
|
||||||
{
|
{
|
||||||
|
public class SearchBoxException : Exception { public SearchBoxException(string message) : base(message) { } }
|
||||||
|
|
||||||
public class SearchBox
|
public class SearchBox
|
||||||
{
|
{
|
||||||
|
private IdMap idMap = new IdMap();
|
||||||
|
private InvertedIndex index = new InvertedIndex();
|
||||||
|
private ConcurrentDictionary<int, DocumentMeta> metaTable = new ConcurrentDictionary<int, DocumentMeta>();
|
||||||
|
|
||||||
public SearchBox()
|
public SearchBox()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddDocument(string title, IEnumerable<string> tags, string content)
|
||||||
|
{
|
||||||
|
DocumentMeta info = new DocumentMeta(title, tags);
|
||||||
|
int id = idMap.GetId(info.Title);
|
||||||
|
Index upsideIndex = new Index(content);
|
||||||
|
index.AddIndex(id, upsideIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void UpdateDocument(string title, IEnumerable<string> newTags, string oldContent, string newContent)
|
||||||
|
{
|
||||||
|
int id = idMap.GetId(title);
|
||||||
|
DocumentMeta info = metaTable[id];
|
||||||
|
info.ReplaceTags(newTags);
|
||||||
|
|
||||||
|
Index oldIndex = new Index(oldContent), newIndex = new Index(newContent);
|
||||||
|
if (!index.ReplaceIndex(id, oldIndex, newIndex))
|
||||||
|
throw new Exception($"Error: Failed to replace index for document with title {title}.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RemoveDocument(string title)
|
||||||
|
{
|
||||||
|
int id = idMap.DeletePageName(title);
|
||||||
|
metaTable.TryRemove(id, out DocumentMeta noop);
|
||||||
|
if (!index.RemoveById(id))
|
||||||
|
throw new SearchBoxException($"Failed to remove page with title '{title}' from inverted index.");
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,7 @@
|
||||||
<Compile Include="InvertedIndex.cs" />
|
<Compile Include="InvertedIndex.cs" />
|
||||||
<Compile Include="IdMap.cs" />
|
<Compile Include="IdMap.cs" />
|
||||||
<Compile Include="Utilities\BiDictionary.cs" />
|
<Compile Include="Utilities\BiDictionary.cs" />
|
||||||
|
<Compile Include="DocumentMeta.cs" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Folder Include="EmbeddedFiles\" />
|
<Folder Include="EmbeddedFiles\" />
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
|
||||||
namespace SearchBox
|
namespace LibSearchBox
|
||||||
{
|
{
|
||||||
public class StopwordTester
|
public class StopwordTester
|
||||||
{
|
{
|
||||||
|
|
|
@ -3,10 +3,10 @@ using System.Collections;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Net;
|
using System.Net;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
using SearchBox.Utilities;
|
using LibSearchBox.Utilities;
|
||||||
using UnidecodeSharpFork;
|
using UnidecodeSharpFork;
|
||||||
|
|
||||||
namespace SearchBox
|
namespace LibSearchBox
|
||||||
{
|
{
|
||||||
[Flags]
|
[Flags]
|
||||||
public enum TokenizerOptions
|
public enum TokenizerOptions
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
using System;
|
using System;
|
||||||
|
|
||||||
namespace SearchBox.Utilities
|
namespace LibSearchBox.Utilities
|
||||||
{
|
{
|
||||||
public static class StringPlus
|
public static class StringPlus
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue