From eec774b7df62645238a5e7a64069f175ee8fb53e Mon Sep 17 00:00:00 2001 From: Neethu Haneesha Bingi Date: Tue, 14 Dec 2021 14:21:56 -0800 Subject: [PATCH] KeyValueStoreRocksDB histograms to track latencies --- fdbserver/KeyValueStoreRocksDB.actor.cpp | 105 ++++++++++++++++++++++- 1 file changed, 101 insertions(+), 4 deletions(-) diff --git a/fdbserver/KeyValueStoreRocksDB.actor.cpp b/fdbserver/KeyValueStoreRocksDB.actor.cpp index cb17fb09743..301b30ca7f3 100644 --- a/fdbserver/KeyValueStoreRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp @@ -14,6 +14,7 @@ #include "flow/flow.h" #include "flow/IThreadPool.h" #include "flow/ThreadHelper.actor.h" +#include "flow/Histogram.h" #include #include @@ -35,6 +36,20 @@ static_assert((ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR == 22) ? ROCKSDB_PATCH >= 1 : namespace { +const StringRef ROCKSDBSTORAGE_HISTOGRAM_GROUP = LiteralStringRef("RocksDBStorage"); +const StringRef ROCKSDB_COMMIT_ACTION_HISTOGRAM = LiteralStringRef("RocksDBCommitAction"); +const StringRef ROCKSDB_WRITE_HISTOGRAM = LiteralStringRef("RocksDBWrite"); +const StringRef ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM = LiteralStringRef("RocksDBDeleteCompactRange"); +const StringRef ROCKSDB_READRANGE_ACTION_HISTOGRAM = LiteralStringRef("RocksDBReadRangeAction"); +const StringRef ROCKSDB_READVALUE_ACTION_HISTOGRAM = LiteralStringRef("RocksDBReadValueAction"); +const StringRef ROCKSDB_READPREFIX_ACTION_HISTOGRAM = LiteralStringRef("RocksDBReadPrefixAction"); +const StringRef ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM = LiteralStringRef("RocksDBReadRangeQueueWait"); +const StringRef ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM = LiteralStringRef("RocksDBReadValueQueueWait"); +const StringRef ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM = LiteralStringRef("RocksDBReadPrefixQueueWait"); +const StringRef ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM = LiteralStringRef("RocksDBReadRangeNewIterator"); +const StringRef ROCKSDB_READVALUE_GET_HISTOGRAM = LiteralStringRef("RocksDBReadValueGet"); +const StringRef ROCKSDB_READPREFIX_GET_HISTOGRAM = LiteralStringRef("RocksDBReadPrefixGet"); + rocksdb::Slice toSlice(StringRef s) { return rocksdb::Slice(reinterpret_cast(s.begin()), s.size()); } @@ -231,8 +246,20 @@ struct RocksDBKeyValueStore : IKeyValueStore { struct Writer : IThreadPoolReceiver { DB& db; UID id; - - explicit Writer(DB& db, UID id) : db(db), id(id) {} + Reference commitActionHistogram; + Reference writeHistogram; + Reference deleteCompactRangeHistogram; + + explicit Writer(DB& db, UID id) + : db(db), id(id), commitActionHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_COMMIT_ACTION_HISTOGRAM, + Histogram::Unit::microseconds)), + writeHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_WRITE_HISTOGRAM, + Histogram::Unit::microseconds)), + deleteCompactRangeHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM, + Histogram::Unit::microseconds)) {} ~Writer() override { if (db) { @@ -304,6 +331,7 @@ struct RocksDBKeyValueStore : IKeyValueStore { double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; } }; void action(CommitAction& a) { + double commitBeginTime = now(); Standalone> deletes; DeleteVisitor dv(deletes, deletes.arena()); ASSERT(a.batchToCommit->Iterate(&dv).ok()); @@ -311,18 +339,26 @@ struct RocksDBKeyValueStore : IKeyValueStore { ASSERT(!deletes.empty() || !a.batchToCommit->HasDeleteRange()); rocksdb::WriteOptions options; options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC; + + double writeBeginTime = now(); auto s = db->Write(options, a.batchToCommit.get()); + writeHistogram->sampleSeconds(now() - writeBeginTime); + if (!s.ok()) { logRocksDBError(s, "Commit"); a.done.sendError(statusToError(s)); } else { a.done.send(Void()); + + double compactRangeBeginTime = now(); for (const auto& keyRange : deletes) { auto begin = toSlice(keyRange.begin); auto end = toSlice(keyRange.end); ASSERT(db->SuggestCompactRange(db->DefaultColumnFamily(), &begin, &end).ok()); } + deleteCompactRangeHistogram->sampleSeconds(now() - compactRangeBeginTime); } + commitActionHistogram->sampleSeconds(now() - commitBeginTime); } struct CloseAction : TypedAction { @@ -361,8 +397,44 @@ struct RocksDBKeyValueStore : IKeyValueStore { double readValueTimeout; double readValuePrefixTimeout; double readRangeTimeout; - - explicit Reader(DB& db) : db(db) { + Reference readRangeActionHistogram; + Reference readValueActionHistogram; + Reference readPrefixActionHistogram; + Reference readRangeQueueWaitHistogram; + Reference readValueQueueWaitHistogram; + Reference readPrefixQueueWaitHistogram; + Reference readRangeNewIteratorHistogram; + Reference readValueGetHistogram; + Reference readPrefixGetHistogram; + + explicit Reader(DB& db) + : db(db), readRangeActionHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_READRANGE_ACTION_HISTOGRAM, + Histogram::Unit::microseconds)), + readValueActionHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_READVALUE_ACTION_HISTOGRAM, + Histogram::Unit::microseconds)), + readPrefixActionHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_READPREFIX_ACTION_HISTOGRAM, + Histogram::Unit::microseconds)), + readRangeQueueWaitHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM, + Histogram::Unit::microseconds)), + readValueQueueWaitHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM, + Histogram::Unit::microseconds)), + readPrefixQueueWaitHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM, + Histogram::Unit::microseconds)), + readRangeNewIteratorHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM, + Histogram::Unit::microseconds)), + readValueGetHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_READVALUE_GET_HISTOGRAM, + Histogram::Unit::microseconds)), + readPrefixGetHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, + ROCKSDB_READPREFIX_GET_HISTOGRAM, + Histogram::Unit::microseconds)) { if (g_network->isSimulated()) { // In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have // very high load and single read thread cannot process all the load within the timeouts. @@ -388,6 +460,7 @@ struct RocksDBKeyValueStore : IKeyValueStore { double getTimeEstimate() const override { return SERVER_KNOBS->READ_VALUE_TIME_ESTIMATE; } }; void action(ReadValueAction& a) { + readValueQueueWaitHistogram->sampleSeconds(now() - a.startTime); Optional traceBatch; if (a.debugID.present()) { traceBatch = { TraceBatch{} }; @@ -401,13 +474,18 @@ struct RocksDBKeyValueStore : IKeyValueStore { a.result.sendError(transaction_too_old()); return; } + double readBeginTime = now(); rocksdb::PinnableSlice value; auto options = getReadOptions(); uint64_t deadlineMircos = db->GetEnv()->NowMicros() + (readValueTimeout - (timer_monotonic() - a.startTime)) * 1000000; std::chrono::seconds deadlineSeconds(deadlineMircos / 1000000); options.deadline = std::chrono::duration_cast(deadlineSeconds); + + double dbGetBeginTime = now(); auto s = db->Get(options, db->DefaultColumnFamily(), toSlice(a.key), &value); + readValueGetHistogram->sampleSeconds(now() - dbGetBeginTime); + if (a.debugID.present()) { traceBatch.get().addEvent("GetValueDebug", a.debugID.get().first(), "Reader.After"); traceBatch.get().dump(); @@ -420,6 +498,7 @@ struct RocksDBKeyValueStore : IKeyValueStore { logRocksDBError(s, "ReadValue"); a.result.sendError(statusToError(s)); } + readValueActionHistogram->sampleSeconds(now() - readBeginTime); } struct ReadValuePrefixAction : TypedAction { @@ -433,6 +512,7 @@ struct RocksDBKeyValueStore : IKeyValueStore { double getTimeEstimate() const override { return SERVER_KNOBS->READ_VALUE_TIME_ESTIMATE; } }; void action(ReadValuePrefixAction& a) { + readPrefixQueueWaitHistogram->sampleSeconds(now() - a.startTime); Optional traceBatch; if (a.debugID.present()) { traceBatch = { TraceBatch{} }; @@ -448,13 +528,18 @@ struct RocksDBKeyValueStore : IKeyValueStore { a.result.sendError(transaction_too_old()); return; } + double readBeginTime = now(); rocksdb::PinnableSlice value; auto options = getReadOptions(); uint64_t deadlineMircos = db->GetEnv()->NowMicros() + (readValuePrefixTimeout - (timer_monotonic() - a.startTime)) * 1000000; std::chrono::seconds deadlineSeconds(deadlineMircos / 1000000); options.deadline = std::chrono::duration_cast(deadlineSeconds); + + double dbGetBeginTime = now(); auto s = db->Get(options, db->DefaultColumnFamily(), toSlice(a.key), &value); + readPrefixGetHistogram->sampleSeconds(now() - dbGetBeginTime); + if (a.debugID.present()) { traceBatch.get().addEvent("GetValuePrefixDebug", a.debugID.get().first(), @@ -470,6 +555,7 @@ struct RocksDBKeyValueStore : IKeyValueStore { logRocksDBError(s, "ReadValuePrefix"); a.result.sendError(statusToError(s)); } + readPrefixActionHistogram->sampleSeconds(now() - readBeginTime); } struct ReadRangeAction : TypedAction, FastAllocated { @@ -482,6 +568,7 @@ struct RocksDBKeyValueStore : IKeyValueStore { double getTimeEstimate() const override { return SERVER_KNOBS->READ_RANGE_TIME_ESTIMATE; } }; void action(ReadRangeAction& a) { + readRangeQueueWaitHistogram->sampleSeconds(now() - a.startTime); if (timer_monotonic() - a.startTime > readRangeTimeout) { TraceEvent(SevWarn, "RocksDBError") .detail("Error", "Read range request timedout") @@ -491,6 +578,7 @@ struct RocksDBKeyValueStore : IKeyValueStore { return; } + double readBeginTime = now(); RangeResult result; if (a.rowLimit == 0 || a.byteLimit == 0) { a.result.send(result); @@ -508,7 +596,11 @@ struct RocksDBKeyValueStore : IKeyValueStore { if (a.rowLimit >= 0) { auto endSlice = toSlice(a.keys.end); options.iterate_upper_bound = &endSlice; + + double iterCreationBeginTime = now(); auto cursor = std::unique_ptr(db->NewIterator(options)); + readRangeNewIteratorHistogram->sampleSeconds(now() - iterCreationBeginTime); + cursor->Seek(toSlice(a.keys.begin)); while (cursor->Valid() && toStringRef(cursor->key()) < a.keys.end) { KeyValueRef kv(toStringRef(cursor->key()), toStringRef(cursor->value())); @@ -532,7 +624,11 @@ struct RocksDBKeyValueStore : IKeyValueStore { } else { auto beginSlice = toSlice(a.keys.begin); options.iterate_lower_bound = &beginSlice; + + double iterCreationBeginTime = now(); auto cursor = std::unique_ptr(db->NewIterator(options)); + readRangeNewIteratorHistogram->sampleSeconds(now() - iterCreationBeginTime); + cursor->SeekForPrev(toSlice(a.keys.end)); if (cursor->Valid() && toStringRef(cursor->key()) == a.keys.end) { cursor->Prev(); @@ -569,6 +665,7 @@ struct RocksDBKeyValueStore : IKeyValueStore { result.readThrough = result[result.size() - 1].key; } a.result.send(result); + readRangeActionHistogram->sampleSeconds(now() - readBeginTime); } };