Parallelise the batch addition mode.\nThis required updating IdMap (and by extension BiDictionary) to support concurrency.

This commit is contained in:
Starbeamrainbowlabs 2018-09-11 23:02:30 +01:00
parent c663882726
commit 0dd0de0b52
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
5 changed files with 80 additions and 31 deletions

View file

@ -7,6 +7,8 @@ using Newtonsoft.Json;
using LibSearchBox;
using System.Text.RegularExpressions;
using Newtonsoft.Json.Serialization;
using System.Threading.Tasks;
using SBRL.Utilities;
namespace SearchBoxCLI
{
@ -156,22 +158,23 @@ namespace SearchBoxCLI
if (!Batch)
searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
else {
string nextLine = "";
while ((nextLine = Source.ReadLine()) != null) {
string[] parts = nextLine.Split('|');
if (parts[0].Trim().Length == 0)
continue;
try {
try
{
Parallel.ForEach(LineIterator.GetLines(Source), (string nextLine) => {
string[] parts = nextLine.Split('|');
if (parts[0].Trim().Length == 0)
return;
searchBox.AddDocument(
parts[1].Trim(),
Regex.Split(parts[2], @",\s*"),
File.ReadAllText(parts[0].Trim())
);
Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}");
} catch (FileNotFoundException) {
Console.Error.WriteLine($"Error: Can't find file {parts[2].Trim()}.");
return 1;
}
});
} catch (FileNotFoundException error) {
Console.Error.WriteLine(error.Message);
return 1;
}
}

View file

@ -40,6 +40,7 @@
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Utilities\LineIterator.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\SearchBox\SearchBox.csproj">
@ -50,5 +51,8 @@
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
<Folder Include="Utilities\" />
</ItemGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
</Project>

View file

@ -0,0 +1,34 @@
using System;
using System.Collections.Generic;
using System.IO;
namespace SBRL.Utilities
{
static class LineIterator {
public static IEnumerable<string> GetLines(TextReader source)
{
string nextLine;
while ((nextLine = source.ReadLine()) != null) {
yield return nextLine;
}
}
public static IEnumerable<string> GetLines(string source)
{
source = source.Replace("\r\n", "\n");
int curPosition = 0, nextIndex;
while (true)
{
nextIndex = source.IndexOf("\n", curPosition);
if (nextIndex == -1)
break;
yield return source.Substring(curPosition, nextIndex - curPosition);
curPosition = nextIndex + 1;
}
}
}
}

View file

@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using Newtonsoft.Json;
using Stackoverflow.Utilities;
@ -41,6 +42,12 @@ namespace LibSearchBox
nextId = map.Max((pair) => pair.First) + 1;
}
private int generateNewId()
{
int incremented = Interlocked.Increment(ref nextId);
return incremented - 1; // Only the incrementing itself has to be atomic
}
public int GetId(string pageName)
{
@ -49,7 +56,7 @@ namespace LibSearchBox
int result;
if (!map.TryGetBySecond(pageName, out result)) {
map.Add(result = nextId++, pageName);
map.Add(result = generateNewId(), pageName);
}
return result;
}

View file

@ -1,5 +1,6 @@
using System;
using System.Collections;
using System.Collections.Concurrent;
using System.Collections.Generic;
namespace Stackoverflow.Utilities
@ -26,9 +27,9 @@ namespace Stackoverflow.Utilities
public struct Enumerator : IEnumerator<Pair>, IEnumerator
{
public Enumerator(Dictionary<TFirst, TSecond>.Enumerator dictEnumerator)
public Enumerator(IEnumerable<KeyValuePair<TFirst, TSecond>> dictEnumerator)
{
_dictEnumerator = dictEnumerator;
_dictEnumerator = dictEnumerator.GetEnumerator();
}
public Pair Current {
@ -61,7 +62,7 @@ namespace Stackoverflow.Utilities
throw new NotSupportedException();
}
private Dictionary<TFirst, TSecond>.Enumerator _dictEnumerator;
private IEnumerator<KeyValuePair<TFirst, TSecond>> _dictEnumerator;
}
@ -78,8 +79,8 @@ namespace Stackoverflow.Utilities
if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second))
throw new ArgumentException("Duplicate first or second");
_firstToSecond.Add(first, second);
_secondToFirst.Add(second, first);
_firstToSecond.TryAdd(first, second);
_secondToFirst.TryAdd(second, first);
}
/// <summary>
@ -124,8 +125,8 @@ namespace Stackoverflow.Utilities
if (!_firstToSecond.TryGetValue(first, out second))
throw new ArgumentException("first");
_firstToSecond.Remove(first);
_secondToFirst.Remove(second);
_firstToSecond.TryRemove(first, out TSecond noop);
_secondToFirst.TryRemove(second, out TFirst noopAgain);
}
/// <summary>
@ -139,8 +140,8 @@ namespace Stackoverflow.Utilities
if (!_secondToFirst.TryGetValue(second, out first))
throw new ArgumentException("second");
_secondToFirst.Remove(second);
_firstToSecond.Remove(first);
_secondToFirst.TryRemove(second, out TFirst noop);
_firstToSecond.TryRemove(first, out TSecond noopAgain);
}
#endregion
@ -159,9 +160,9 @@ namespace Stackoverflow.Utilities
if (_firstToSecond.ContainsKey(first) || _secondToFirst.ContainsKey(second))
return false;
_firstToSecond.Add(first, second);
_secondToFirst.Add(second, first);
return true;
bool result = _firstToSecond.TryAdd(first, second);
result = _secondToFirst.TryAdd(second, first) || result;
return result;
}
@ -200,9 +201,9 @@ namespace Stackoverflow.Utilities
if (!_firstToSecond.TryGetValue(first, out second))
return false;
_firstToSecond.Remove(first);
_secondToFirst.Remove(second);
return true;
bool result = _firstToSecond.TryRemove(first, out TSecond noop);
result = _secondToFirst.TryRemove(second, out TFirst noopAgain) || result;
return result;
}
/// <summary>
@ -216,8 +217,8 @@ namespace Stackoverflow.Utilities
if (!_secondToFirst.TryGetValue(second, out first))
return false;
_secondToFirst.Remove(second);
_firstToSecond.Remove(first);
_secondToFirst.TryRemove(second, out TFirst noop);
_firstToSecond.TryRemove(first, out TSecond noopAgain);
return true;
}
@ -243,7 +244,7 @@ namespace Stackoverflow.Utilities
public Enumerator GetEnumerator()
{
//enumerator.Reset(firstToSecond.GetEnumerator());
return new Enumerator(_firstToSecond.GetEnumerator());
return new Enumerator(_firstToSecond);
}
IEnumerator<Pair> IEnumerable<Pair>.GetEnumerator()
@ -258,8 +259,8 @@ namespace Stackoverflow.Utilities
private Dictionary<TFirst, TSecond> _firstToSecond = new Dictionary<TFirst, TSecond>();
private Dictionary<TSecond, TFirst> _secondToFirst = new Dictionary<TSecond, TFirst>();
private ConcurrentDictionary<TFirst, TSecond> _firstToSecond = new ConcurrentDictionary<TFirst, TSecond>();
private ConcurrentDictionary<TSecond, TFirst> _secondToFirst = new ConcurrentDictionary<TSecond, TFirst>();
}
}