Parallelise the batch addition mode.\nThis required updating IdMap (and by extension BiDictionary) to support concurrency.
This commit is contained in:
parent
c663882726
commit
0dd0de0b52
5 changed files with 80 additions and 31 deletions
|
@ -7,6 +7,8 @@ using Newtonsoft.Json;
|
||||||
using LibSearchBox;
|
using LibSearchBox;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
using Newtonsoft.Json.Serialization;
|
using Newtonsoft.Json.Serialization;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using SBRL.Utilities;
|
||||||
|
|
||||||
namespace SearchBoxCLI
|
namespace SearchBoxCLI
|
||||||
{
|
{
|
||||||
|
@ -156,24 +158,25 @@ namespace SearchBoxCLI
|
||||||
if (!Batch)
|
if (!Batch)
|
||||||
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
|
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
|
||||||
else {
|
else {
|
||||||
string nextLine = "";
|
try
|
||||||
while ((nextLine = Source.ReadLine()) != null) {
|
{
|
||||||
|
Parallel.ForEach(LineIterator.GetLines(Source), (string nextLine) => {
|
||||||
string[] parts = nextLine.Split('|');
|
string[] parts = nextLine.Split('|');
|
||||||
if (parts[0].Trim().Length == 0)
|
if (parts[0].Trim().Length == 0)
|
||||||
continue;
|
return;
|
||||||
try {
|
|
||||||
searchBox.AddDocument(
|
searchBox.AddDocument(
|
||||||
parts[1].Trim(),
|
parts[1].Trim(),
|
||||||
Regex.Split(parts[2], @",\s*"),
|
Regex.Split(parts[2], @",\s*"),
|
||||||
File.ReadAllText(parts[0].Trim())
|
File.ReadAllText(parts[0].Trim())
|
||||||
);
|
);
|
||||||
Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}");
|
Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}");
|
||||||
} catch (FileNotFoundException) {
|
});
|
||||||
Console.Error.WriteLine($"Error: Can't find file {parts[2].Trim()}.");
|
} catch (FileNotFoundException error) {
|
||||||
|
Console.Error.WriteLine(error.Message);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Compile Include="Program.cs" />
|
<Compile Include="Program.cs" />
|
||||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||||
|
<Compile Include="Utilities\LineIterator.cs" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="..\SearchBox\SearchBox.csproj">
|
<ProjectReference Include="..\SearchBox\SearchBox.csproj">
|
||||||
|
@ -50,5 +51,8 @@
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="packages.config" />
|
<None Include="packages.config" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<Folder Include="Utilities\" />
|
||||||
|
</ItemGroup>
|
||||||
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||||
</Project>
|
</Project>
|
34
SearchBox-CLI/Utilities/LineIterator.cs
Normal file
34
SearchBox-CLI/Utilities/LineIterator.cs
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
|
||||||
|
namespace SBRL.Utilities
|
||||||
|
{
|
||||||
|
static class LineIterator {
|
||||||
|
|
||||||
|
public static IEnumerable<string> GetLines(TextReader source)
|
||||||
|
{
|
||||||
|
string nextLine;
|
||||||
|
while ((nextLine = source.ReadLine()) != null) {
|
||||||
|
yield return nextLine;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static IEnumerable<string> GetLines(string source)
|
||||||
|
{
|
||||||
|
source = source.Replace("\r\n", "\n");
|
||||||
|
int curPosition = 0, nextIndex;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
nextIndex = source.IndexOf("\n", curPosition);
|
||||||
|
if (nextIndex == -1)
|
||||||
|
break;
|
||||||
|
|
||||||
|
yield return source.Substring(curPosition, nextIndex - curPosition);
|
||||||
|
|
||||||
|
curPosition = nextIndex + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,6 +2,7 @@
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
|
using System.Threading;
|
||||||
using Newtonsoft.Json;
|
using Newtonsoft.Json;
|
||||||
using Stackoverflow.Utilities;
|
using Stackoverflow.Utilities;
|
||||||
|
|
||||||
|
@ -41,6 +42,12 @@ namespace LibSearchBox
|
||||||
nextId = map.Max((pair) => pair.First) + 1;
|
nextId = map.Max((pair) => pair.First) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int generateNewId()
|
||||||
|
{
|
||||||
|
int incremented = Interlocked.Increment(ref nextId);
|
||||||
|
return incremented - 1; // Only the incrementing itself has to be atomic
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public int GetId(string pageName)
|
public int GetId(string pageName)
|
||||||
{
|
{
|
||||||
|
@ -49,7 +56,7 @@ namespace LibSearchBox
|
||||||
|
|
||||||
int result;
|
int result;
|
||||||
if (!map.TryGetBySecond(pageName, out result)) {
|
if (!map.TryGetBySecond(pageName, out result)) {
|
||||||
map.Add(result = nextId++, pageName);
|
map.Add(result = generateNewId(), pageName);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections;
|
using System.Collections;
|
||||||
|
using System.Collections.Concurrent;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
|
||||||
namespace Stackoverflow.Utilities
|
namespace Stackoverflow.Utilities
|
||||||
|
@ -26,9 +27,9 @@ namespace Stackoverflow.Utilities
|
||||||
public struct Enumerator : IEnumerator<Pair>, IEnumerator
|
public struct Enumerator : IEnumerator<Pair>, IEnumerator
|
||||||
{
|
{
|
||||||
|
|
||||||
public Enumerator(Dictionary<TFirst, TSecond>.Enumerator dictEnumerator)
|
public Enumerator(IEnumerable<KeyValuePair<TFirst, TSecond>> dictEnumerator)
|
||||||
{
|
{
|
||||||
_dictEnumerator = dictEnumerator;
|
_dictEnumerator = dictEnumerator.GetEnumerator();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Pair Current {
|
public Pair Current {
|
||||||
|
@ -61,7 +62,7 @@ namespace Stackoverflow.Utilities
|
||||||
throw new NotSupportedException();
|
throw new NotSupportedException();
|
||||||
}
|
}
|
||||||
|
|
||||||
private Dictionary<TFirst, TSecond>.Enumerator _dictEnumerator;
|
private IEnumerator<KeyValuePair<TFirst, TSecond>> _dictEnumerator;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,8 +79,8 @@ namespace Stackoverflow.Utilities
|
||||||
if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second))
|
if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second))
|
||||||
throw new ArgumentException("Duplicate first or second");
|
throw new ArgumentException("Duplicate first or second");
|
||||||
|
|
||||||
_firstToSecond.Add(first, second);
|
_firstToSecond.TryAdd(first, second);
|
||||||
_secondToFirst.Add(second, first);
|
_secondToFirst.TryAdd(second, first);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -124,8 +125,8 @@ namespace Stackoverflow.Utilities
|
||||||
if (!_firstToSecond.TryGetValue(first, out second))
|
if (!_firstToSecond.TryGetValue(first, out second))
|
||||||
throw new ArgumentException("first");
|
throw new ArgumentException("first");
|
||||||
|
|
||||||
_firstToSecond.Remove(first);
|
_firstToSecond.TryRemove(first, out TSecond noop);
|
||||||
_secondToFirst.Remove(second);
|
_secondToFirst.TryRemove(second, out TFirst noopAgain);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -139,8 +140,8 @@ namespace Stackoverflow.Utilities
|
||||||
if (!_secondToFirst.TryGetValue(second, out first))
|
if (!_secondToFirst.TryGetValue(second, out first))
|
||||||
throw new ArgumentException("second");
|
throw new ArgumentException("second");
|
||||||
|
|
||||||
_secondToFirst.Remove(second);
|
_secondToFirst.TryRemove(second, out TFirst noop);
|
||||||
_firstToSecond.Remove(first);
|
_firstToSecond.TryRemove(first, out TSecond noopAgain);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
@ -159,9 +160,9 @@ namespace Stackoverflow.Utilities
|
||||||
if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second))
|
if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
_firstToSecond.Add(first, second);
|
bool result = _firstToSecond.TryAdd(first, second);
|
||||||
_secondToFirst.Add(second, first);
|
result = _secondToFirst.TryAdd(second, first) || result;
|
||||||
return true;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -200,9 +201,9 @@ namespace Stackoverflow.Utilities
|
||||||
if (!_firstToSecond.TryGetValue(first, out second))
|
if (!_firstToSecond.TryGetValue(first, out second))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
_firstToSecond.Remove(first);
|
bool result = _firstToSecond.TryRemove(first, out TSecond noop);
|
||||||
_secondToFirst.Remove(second);
|
result = _secondToFirst.TryRemove(second, out TFirst noopAgain) || result;
|
||||||
return true;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -216,8 +217,8 @@ namespace Stackoverflow.Utilities
|
||||||
if (!_secondToFirst.TryGetValue(second, out first))
|
if (!_secondToFirst.TryGetValue(second, out first))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
_secondToFirst.Remove(second);
|
_secondToFirst.TryRemove(second, out TFirst noop);
|
||||||
_firstToSecond.Remove(first);
|
_firstToSecond.TryRemove(first, out TSecond noopAgain);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,7 +244,7 @@ namespace Stackoverflow.Utilities
|
||||||
public Enumerator GetEnumerator()
|
public Enumerator GetEnumerator()
|
||||||
{
|
{
|
||||||
//enumerator.Reset(firstToSecond.GetEnumerator());
|
//enumerator.Reset(firstToSecond.GetEnumerator());
|
||||||
return new Enumerator(_firstToSecond.GetEnumerator());
|
return new Enumerator(_firstToSecond);
|
||||||
}
|
}
|
||||||
|
|
||||||
IEnumerator<Pair> IEnumerable<Pair>.GetEnumerator()
|
IEnumerator<Pair> IEnumerable<Pair>.GetEnumerator()
|
||||||
|
@ -258,8 +259,8 @@ namespace Stackoverflow.Utilities
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private Dictionary<TFirst, TSecond> _firstToSecond = new Dictionary<TFirst, TSecond>();
|
private ConcurrentDictionary<TFirst, TSecond> _firstToSecond = new ConcurrentDictionary<TFirst, TSecond>();
|
||||||
private Dictionary<TSecond, TFirst> _secondToFirst = new Dictionary<TSecond, TFirst>();
|
private ConcurrentDictionary<TSecond, TFirst> _secondToFirst = new ConcurrentDictionary<TSecond, TFirst>();
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in a new issue