Skip to content

Commit

Permalink
bugfix: mapping between long keyId and ulong keyHash broke
Browse files Browse the repository at this point in the history
  • Loading branch information
kreeben committed May 8, 2024
1 parent 3a8c517 commit 4a0a6bd
Show file tree
Hide file tree
Showing 12 changed files with 61 additions and 99 deletions.
2 changes: 1 addition & 1 deletion index.bat
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sir.bat indexwikipedia --directory C:\projects\resin\src\Sir.HttpServer\AppData\database --collection wikipedia --skip 0 --take 200000 --pageSize 100000 --sampleSize 1000 %*
sir.bat indexwikipedia --directory C:\projects\resin\src\Sir.HttpServer\AppData\database --collection wikipedia --skip 0 --take 100000 --pageSize 10000 --sampleSize 1000 %*
2 changes: 1 addition & 1 deletion src/Sir.Document/DocmentMapReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
namespace Sir.Documents
{
/// <summary>
/// Read document maps (key_id/val_id) from the document map stream.
/// Read document maps (key_id/val_id, each pair 2*sizeof(long)) from the document map stream.
/// A document map is needed to re-contruct a complete document.
/// </summary>
public class DocmentMapReader : IDisposable
Expand Down
5 changes: 0 additions & 5 deletions src/Sir.Document/DocumentIndexWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ public DocumentIndexWriter(Stream stream)
_stream = stream;
}

public void Flush()
{
_stream.Flush();
}

/// <summary>
/// Get the next auto-incrementing doc id
/// </summary>
Expand Down
2 changes: 1 addition & 1 deletion src/Sir.Document/DocumentMapWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace Sir.Documents
{
/// <summary>
/// Writes document maps (key_id/val_id) to a bitmap.
/// Writes document maps (key_id/val_id, each pair 2*sizeof(long)) to a bitmap.
/// </summary>
public class DocumentMapWriter : IDisposable
{
Expand Down
2 changes: 1 addition & 1 deletion src/Sir.Document/DocumentReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static Document Read(long documentId, DocumentRegistryReader documentRead
var vInfo = documentReader.GetAddressOfValue(kvp.valId);
var val = documentReader.GetValue(vInfo.offset, vInfo.len, vInfo.dataType);

fields.Add(new Field(key, val, kvp.keyId));
fields.Add(new Field(key, val, kvp.keyId, documentId));
}
}

Expand Down
28 changes: 14 additions & 14 deletions src/Sir.Document/DocumentRegistryWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ namespace Sir.Documents
/// </summary>
public class DocumentRegistryWriter : IDisposable
{
private DocumentMapWriter _docs;
private DocumentIndexWriter _docIx;
private DocumentMapWriter _documentMapWriter;
private DocumentIndexWriter _documentIndexWriter;
private KeyValueWriter _kvWriter;
private readonly string _directory;
private readonly ulong _collectionId;
Expand All @@ -19,48 +19,48 @@ public class DocumentRegistryWriter : IDisposable

public DocumentRegistryWriter(string directory, ulong collectionId)
{
_docs = new DocumentMapWriter(KeyValueWriter.CreateAppendStream(directory, collectionId, "docs"));
_docIx = new DocumentIndexWriter(KeyValueWriter.CreateAppendStream(directory, collectionId, "dix"));
_documentMapWriter = new DocumentMapWriter(KeyValueWriter.CreateAppendStream(directory, collectionId, "docs"));
_documentIndexWriter = new DocumentIndexWriter(KeyValueWriter.CreateAppendStream(directory, collectionId, "dix"));
_kvWriter = new KeyValueWriter(directory, collectionId);
_directory = directory;
_collectionId = collectionId;
}

public long IncrementDocId()
{
return _docIx.IncrementDocId();
return _documentIndexWriter.IncrementDocId();
}

public (long offset, int length) PutDocumentMap(IList<(long keyId, long valId)> doc)
{
return _docs.Put(doc);
return _documentMapWriter.Put(doc);
}

public void UpdateDocumentMap(long offsetOfMap, int indexInMap, long keyId, long valId)
{
_docs.Overwrite(offsetOfMap, indexInMap, keyId, valId);
_documentMapWriter.Overwrite(offsetOfMap, indexInMap, keyId, valId);
}

public void PutDocumentAddress(long docId, long offset, int len)
{
_docIx.Put(docId, offset, len);
_documentIndexWriter.Put(docId, offset, len);
}

public void Commit()
{
_docs.Dispose();
_docIx.Dispose();
_documentMapWriter.Dispose();
_documentIndexWriter.Dispose();
_kvWriter.Dispose();

_docs = new DocumentMapWriter(KeyValueWriter.CreateAppendStream(_directory, _collectionId, "docs"));
_docIx = new DocumentIndexWriter(KeyValueWriter.CreateAppendStream(_directory, _collectionId, "dix"));
_documentMapWriter = new DocumentMapWriter(KeyValueWriter.CreateAppendStream(_directory, _collectionId, "docs"));
_documentIndexWriter = new DocumentIndexWriter(KeyValueWriter.CreateAppendStream(_directory, _collectionId, "dix"));
_kvWriter = new KeyValueWriter(_directory, _collectionId);
}

public void Dispose()
{
_docs.Dispose();
_docIx.Dispose();
_documentMapWriter.Dispose();
_documentIndexWriter.Dispose();
_kvWriter.Dispose();
}
}
Expand Down
14 changes: 4 additions & 10 deletions src/Sir.InformationRetreival/Session/DocumentDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ public DocumentDatabase(string directory, ulong collectionId, IModel<T> model =
{
_directory = directory ?? throw new ArgumentNullException(nameof(directory));
_collectionId = collectionId;
_model = model ?? throw new ArgumentNullException(nameof(model));
_indexStrategy = indexStrategy ?? throw new ArgumentNullException(nameof(indexStrategy));
_model = model;
_indexStrategy = indexStrategy;
_writeSession = new WriteSession(new DocumentRegistryWriter(directory, collectionId));
_indexSession = new IndexSession<T>(directory, collectionId, model, indexStrategy, logger);
_searchSession = new SearchSession<T>(directory, _model, _indexStrategy, logger);
Expand Down Expand Up @@ -89,9 +89,6 @@ public void OptimizeAllIndices(int skipDocuments = 0, int takeDocuments = int.Ma
public void Truncate()
{
DisposeInternal();
_writeSession = null;
_indexSession = null;
_searchSession = null;

var count = 0;

Expand Down Expand Up @@ -153,9 +150,6 @@ public void TruncateIndexOnly()
public void Rename(ulong newCollectionId)
{
DisposeInternal();
_writeSession = null;
_indexSession = null;
_searchSession = null;

var count = 0;
var from = _collectionId.ToString();
Expand Down Expand Up @@ -186,7 +180,7 @@ private void LogInformation(string message)
_logger.LogInformation(message);
}

public void Commit()
public void CommitIndexAndClearSearchCache()
{
_writeSession.Commit();
_indexSession.Commit();
Expand All @@ -195,7 +189,7 @@ public void Commit()

public void Dispose()
{
Commit();
CommitIndexAndClearSearchCache();
DisposeInternal();
}

Expand Down
9 changes: 2 additions & 7 deletions src/Sir.InformationRetreival/Session/WriteSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public void Put(Document document)

if (field.Value != null)
{
Write(field, docMap);
WriteField(field, docMap);
}
}

Expand All @@ -38,7 +38,7 @@ public void Put(Document document)
_documentWriter.PutDocumentAddress(document.Id, docMeta.offset, docMeta.length);
}

private void Write(Field field, IList<(long, long)> docMap)
private void WriteField(Field field, IList<(long, long)> docMap)
{
field.KeyId = EnsureKeyExists(field.Name);

Expand All @@ -59,11 +59,6 @@ public long EnsureKeyExists(string key)
return _documentWriter.KeyValueWriter.EnsureKeyExists(key);
}

public long EnsureKeyExistsSafely(string key)
{
return _documentWriter.KeyValueWriter.EnsureKeyExistsSafely(key);
}

public void Commit()
{
_documentWriter.Commit();
Expand Down
42 changes: 27 additions & 15 deletions src/Sir.KeyValue/KeyValueReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,24 @@ public bool TryGetKeyId(ulong keyHash, out long keyId)
if (!_keyCache.TryGetValue(key, out keys))
{
ReadKeysIntoCache();

if (!_keyCache.TryGetValue(key, out keys))
{
// there are no keys registered for this collection, even on disk.

keyId = -1;
return false;
}
}

if (keys != null || _keyCache.TryGetValue(key, out keys))
if (keys.TryGetValue(keyHash, out keyId))
{
return true;
}
else
{
ReadKeysIntoCache();

if (keys.TryGetValue(keyHash, out keyId))
{
return true;
Expand All @@ -64,31 +78,29 @@ public bool TryGetKeyId(ulong keyHash, out long keyId)

private void ReadKeysIntoCache()
{
_keyCache.Clear();

foreach (var keyFile in System.IO.Directory.GetFiles(_directory, "*.kmap"))
{
var collectionId = ulong.Parse(Path.GetFileNameWithoutExtension(keyFile));
var key = Path.Combine(_directory, collectionId.ToString()).ToHash();
var keys = new ConcurrentDictionary<ulong, long>();

var keys = _keyCache.GetOrAdd(key, (k) =>
using (var stream = new FileStream(keyFile, FileMode.OpenOrCreate, FileAccess.Read, FileShare.ReadWrite))
{
var ks = new ConcurrentDictionary<ulong, long>();
long i = 0;
var buf = new byte[sizeof(ulong)];
var read = stream.Read(buf, 0, buf.Length);

using (var stream = new FileStream(keyFile, FileMode.OpenOrCreate, FileAccess.Read, FileShare.ReadWrite))
while (read > 0)
{
long i = 0;
var buf = new byte[sizeof(ulong)];
var read = stream.Read(buf, 0, buf.Length);
while (read > 0)
{
ks.TryAdd(BitConverter.ToUInt64(buf, 0), i++);
keys.TryAdd(BitConverter.ToUInt64(buf, 0), i++);

read = stream.Read(buf, 0, buf.Length);
}
read = stream.Read(buf, 0, buf.Length);
}
}

return ks;
});
_keyCache.GetOrAdd(key, keys);
}
}

Expand Down
46 changes: 6 additions & 40 deletions src/Sir.KeyValue/KeyValueWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ public class KeyValueWriter : IDisposable
private readonly ValueIndexWriter _keyIx;
private readonly ulong _collectionId;
private readonly string _directory;
private readonly object _keyLock = new object();
private ConcurrentDictionary<ulong, ConcurrentDictionary<ulong, long>> _keyCache;
private readonly KeyValueReader _kvReader;
private static object _keyLock = new object();

public KeyValueWriter(string directory, ulong collectionId)
: this(
Expand All @@ -29,7 +28,6 @@ public KeyValueWriter(string directory, ulong collectionId)
{
_collectionId = collectionId;
_directory = directory;
_keyCache = new ConcurrentDictionary<ulong, ConcurrentDictionary<ulong, long>>();
_kvReader = new KeyValueReader(directory, collectionId);
}

Expand Down Expand Up @@ -75,7 +73,7 @@ public static Stream CreateAppendStream(string directory, ulong collectionId, lo
return new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite);
}

public long EnsureKeyExistsSafely(string keyStr)
public long EnsureKeyExists(string keyStr)
{
var keyHash = keyStr.ToHash();
long keyId;
Expand All @@ -94,35 +92,17 @@ public long EnsureKeyExistsSafely(string keyStr)
keyId = PutKeyInfo(keyInfo.offset, keyInfo.len, keyInfo.dataType);

// store key mapping
RegisterKeyMapping(_directory, _collectionId, keyHash, keyId);
using (var stream = CreateAppendStream(_directory, _collectionId, "kmap"))
{
stream.Write(BitConverter.GetBytes(keyHash), 0, sizeof(ulong));
}
}
}
}

return keyId;
}

public long EnsureKeyExists(string keyStr)
{
var keyHash = keyStr.ToHash();
long keyId;

if (!_kvReader.TryGetKeyId(keyHash, out keyId))
{
// We have a new key!

// store key
var keyInfo = PutKey(keyStr);

keyId = PutKeyInfo(keyInfo.offset, keyInfo.len, keyInfo.dataType);

// store key mapping
RegisterKeyMapping(_directory, _collectionId, keyHash, keyId);
}

return keyId;
}

public (long keyId, long valueId) PutValue(long keyId, object val, out byte dataType)
{
// store value
Expand Down Expand Up @@ -164,20 +144,6 @@ public void OverwriteFixedLengthValue(long offset, object value, Type type)
_vals.Put(value);
}

public void RegisterKeyMapping(string directory, ulong collectionId, ulong keyHash, long keyId)
{
var key = Path.Combine(directory, collectionId.ToString()).ToHash();
var keys = _keyCache.GetOrAdd(key, (key) => { return new ConcurrentDictionary<ulong, long>(); });
var keyMapping = keys.GetOrAdd(keyHash, (key) =>
{
using (var stream = CreateAppendStream(directory, collectionId, "kmap"))
{
stream.Write(BitConverter.GetBytes(keyHash), 0, sizeof(ulong));
}
return keyId;
});
}

public void Dispose()
{
_vals.Dispose();
Expand Down
6 changes: 3 additions & 3 deletions src/Sir.StringTests/BagOfCharsDatabaseTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public void Can_stream()
database.Write(document, index: false);
}

database.Commit();
database.CommitIndexAndClearSearchCache();

var i = 0;

Expand Down Expand Up @@ -69,7 +69,7 @@ public void Can_read_and_write()
database.Write(document);
}

database.Commit();
database.CommitIndexAndClearSearchCache();

var queryParser = database.CreateQueryParser();

Expand Down Expand Up @@ -109,7 +109,7 @@ public void Can_optimize_index()
database.Write(document, store:true, index:false); // note: no indexing going on here
}

database.Commit();
database.CommitIndexAndClearSearchCache();

var queryParser = database.CreateQueryParser();

Expand Down
2 changes: 1 addition & 1 deletion write.bat
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sir.bat writewikipedia --directory C:\projects\resin\src\Sir.HttpServer\AppData\database --file d:\enwiki-20211122-cirrussearch-content.json.gz --collection wikipedia --skip 100000 --take 100000 --sampleSize 10000 %*
sir.bat writewikipedia --directory C:\projects\resin\src\Sir.HttpServer\AppData\database --file d:\enwiki-20211122-cirrussearch-content.json.gz --collection wikipedia --skip 0 --take 10000 --sampleSize 1000 %*

0 comments on commit 4a0a6bd

Please sign in to comment.