Parallelise the batch addition mode.\nThis required updating IdMap (and by extension BiDictionary) to support concurrency.

This commit is contained in:
Starbeamrainbowlabs 2018-09-11 23:02:30 +01:00
parent c663882726
commit 0dd0de0b52
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
5 changed files with 80 additions and 31 deletions

View File

@ -7,6 +7,8 @@ using Newtonsoft.Json;
using LibSearchBox; using LibSearchBox;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using Newtonsoft.Json.Serialization; using Newtonsoft.Json.Serialization;
using System.Threading.Tasks;
using SBRL.Utilities;
namespace SearchBoxCLI namespace SearchBoxCLI
{ {
@ -156,22 +158,23 @@ namespace SearchBoxCLI
if (!Batch) if (!Batch)
searchBox.AddDocument(Name, Tags, Source.ReadToEnd()); searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
else { else {
string nextLine = ""; try
while ((nextLine = Source.ReadLine()) != null) { {
string[] parts = nextLine.Split('|'); Parallel.ForEach(LineIterator.GetLines(Source), (string nextLine) => {
if (parts[0].Trim().Length == 0) string[] parts = nextLine.Split('|');
continue; if (parts[0].Trim().Length == 0)
try { return;
searchBox.AddDocument( searchBox.AddDocument(
parts[1].Trim(), parts[1].Trim(),
Regex.Split(parts[2], @",\s*"), Regex.Split(parts[2], @",\s*"),
File.ReadAllText(parts[0].Trim()) File.ReadAllText(parts[0].Trim())
); );
Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}"); Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}");
} catch (FileNotFoundException) { });
Console.Error.WriteLine($"Error: Can't find file {parts[2].Trim()}."); } catch (FileNotFoundException error) {
return 1; Console.Error.WriteLine(error.Message);
} return 1;
} }
} }

View File

@ -40,6 +40,7 @@
<ItemGroup> <ItemGroup>
<Compile Include="Program.cs" /> <Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Utilities\LineIterator.cs" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\SearchBox\SearchBox.csproj"> <ProjectReference Include="..\SearchBox\SearchBox.csproj">
@ -50,5 +51,8 @@
<ItemGroup> <ItemGroup>
<None Include="packages.config" /> <None Include="packages.config" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<Folder Include="Utilities\" />
</ItemGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
</Project> </Project>

View File

@ -0,0 +1,34 @@
using System;
using System.Collections.Generic;
using System.IO;
namespace SBRL.Utilities
{
static class LineIterator {
public static IEnumerable<string> GetLines(TextReader source)
{
string nextLine;
while ((nextLine = source.ReadLine()) != null) {
yield return nextLine;
}
}
public static IEnumerable<string> GetLines(string source)
{
source = source.Replace("\r\n", "\n");
int curPosition = 0, nextIndex;
while (true)
{
nextIndex = source.IndexOf("\n", curPosition);
if (nextIndex == -1)
break;
yield return source.Substring(curPosition, nextIndex - curPosition);
curPosition = nextIndex + 1;
}
}
}
}

View File

@ -2,6 +2,7 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Text; using System.Text;
using System.Threading;
using Newtonsoft.Json; using Newtonsoft.Json;
using Stackoverflow.Utilities; using Stackoverflow.Utilities;
@ -41,6 +42,12 @@ namespace LibSearchBox
nextId = map.Max((pair) => pair.First) + 1; nextId = map.Max((pair) => pair.First) + 1;
} }
private int generateNewId()
{
int incremented = Interlocked.Increment(ref nextId);
return incremented - 1; // Only the incrementing itself has to be atomic
}
public int GetId(string pageName) public int GetId(string pageName)
{ {
@ -49,7 +56,7 @@ namespace LibSearchBox
int result; int result;
if (!map.TryGetBySecond(pageName, out result)) { if (!map.TryGetBySecond(pageName, out result)) {
map.Add(result = nextId++, pageName); map.Add(result = generateNewId(), pageName);
} }
return result; return result;
} }

View File

@ -1,5 +1,6 @@
using System; using System;
using System.Collections; using System.Collections;
using System.Collections.Concurrent;
using System.Collections.Generic; using System.Collections.Generic;
namespace Stackoverflow.Utilities namespace Stackoverflow.Utilities
@ -26,9 +27,9 @@ namespace Stackoverflow.Utilities
public struct Enumerator : IEnumerator<Pair>, IEnumerator public struct Enumerator : IEnumerator<Pair>, IEnumerator
{ {
public Enumerator(Dictionary<TFirst, TSecond>.Enumerator dictEnumerator) public Enumerator(IEnumerable<KeyValuePair<TFirst, TSecond>> dictEnumerator)
{ {
_dictEnumerator = dictEnumerator; _dictEnumerator = dictEnumerator.GetEnumerator();
} }
public Pair Current { public Pair Current {
@ -61,7 +62,7 @@ namespace Stackoverflow.Utilities
throw new NotSupportedException(); throw new NotSupportedException();
} }
private Dictionary<TFirst, TSecond>.Enumerator _dictEnumerator; private IEnumerator<KeyValuePair<TFirst, TSecond>> _dictEnumerator;
} }
@ -78,8 +79,8 @@ namespace Stackoverflow.Utilities
if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second)) if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second))
throw new ArgumentException("Duplicate first or second"); throw new ArgumentException("Duplicate first or second");
_firstToSecond.Add(first, second); _firstToSecond.TryAdd(first, second);
_secondToFirst.Add(second, first); _secondToFirst.TryAdd(second, first);
} }
/// <summary> /// <summary>
@ -124,8 +125,8 @@ namespace Stackoverflow.Utilities
if (!_firstToSecond.TryGetValue(first, out second)) if (!_firstToSecond.TryGetValue(first, out second))
throw new ArgumentException("first"); throw new ArgumentException("first");
_firstToSecond.Remove(first); _firstToSecond.TryRemove(first, out TSecond noop);
_secondToFirst.Remove(second); _secondToFirst.TryRemove(second, out TFirst noopAgain);
} }
/// <summary> /// <summary>
@ -139,8 +140,8 @@ namespace Stackoverflow.Utilities
if (!_secondToFirst.TryGetValue(second, out first)) if (!_secondToFirst.TryGetValue(second, out first))
throw new ArgumentException("second"); throw new ArgumentException("second");
_secondToFirst.Remove(second); _secondToFirst.TryRemove(second, out TFirst noop);
_firstToSecond.Remove(first); _firstToSecond.TryRemove(first, out TSecond noopAgain);
} }
#endregion #endregion
@ -159,9 +160,9 @@ namespace Stackoverflow.Utilities
if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second)) if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second))
return false; return false;
_firstToSecond.Add(first, second); bool result = _firstToSecond.TryAdd(first, second);
_secondToFirst.Add(second, first); result = _secondToFirst.TryAdd(second, first) || result;
return true; return result;
} }
@ -200,9 +201,9 @@ namespace Stackoverflow.Utilities
if (!_firstToSecond.TryGetValue(first, out second)) if (!_firstToSecond.TryGetValue(first, out second))
return false; return false;
_firstToSecond.Remove(first); bool result = _firstToSecond.TryRemove(first, out TSecond noop);
_secondToFirst.Remove(second); result = _secondToFirst.TryRemove(second, out TFirst noopAgain) || result;
return true; return result;
} }
/// <summary> /// <summary>
@ -216,8 +217,8 @@ namespace Stackoverflow.Utilities
if (!_secondToFirst.TryGetValue(second, out first)) if (!_secondToFirst.TryGetValue(second, out first))
return false; return false;
_secondToFirst.Remove(second); _secondToFirst.TryRemove(second, out TFirst noop);
_firstToSecond.Remove(first); _firstToSecond.TryRemove(first, out TSecond noopAgain);
return true; return true;
} }
@ -243,7 +244,7 @@ namespace Stackoverflow.Utilities
public Enumerator GetEnumerator() public Enumerator GetEnumerator()
{ {
//enumerator.Reset(firstToSecond.GetEnumerator()); //enumerator.Reset(firstToSecond.GetEnumerator());
return new Enumerator(_firstToSecond.GetEnumerator()); return new Enumerator(_firstToSecond);
} }
IEnumerator<Pair> IEnumerable<Pair>.GetEnumerator() IEnumerator<Pair> IEnumerable<Pair>.GetEnumerator()
@ -258,8 +259,8 @@ namespace Stackoverflow.Utilities
private Dictionary<TFirst, TSecond> _firstToSecond = new Dictionary<TFirst, TSecond>(); private ConcurrentDictionary<TFirst, TSecond> _firstToSecond = new ConcurrentDictionary<TFirst, TSecond>();
private Dictionary<TSecond, TFirst> _secondToFirst = new Dictionary<TSecond, TFirst>(); private ConcurrentDictionary<TSecond, TFirst> _secondToFirst = new ConcurrentDictionary<TSecond, TFirst>();
} }
} }