From 1b9387eddc1f52165d3243d2ace9be0c62495eea Mon Sep 17 00:00:00 2001 From: Nik Bougalis Date: Sat, 20 Nov 2021 00:02:09 -0800 Subject: [PATCH] Eliminate SHAMapInnerNode lock contention: The `SHAMapInnerNode` class had a global mutex to protect the array of node children. Profiling suggested that around 4% of all attempts to lock the global would block. This commit removes that global mutex, and replaces it with a new per-node 16-way spinlock (implemented so as not to effect the size of an inner node objet), effectively eliminating the lock contention. --- src/ripple/shamap/SHAMapInnerNode.h | 7 +- src/ripple/shamap/impl/SHAMapInnerNode.cpp | 105 ++++++++++++++++++--- src/ripple/shamap/impl/SHAMapSync.cpp | 3 +- src/ripple/shamap/impl/TaggedPointer.ipp | 4 +- 4 files changed, 103 insertions(+), 16 deletions(-) diff --git a/src/ripple/shamap/SHAMapInnerNode.h b/src/ripple/shamap/SHAMapInnerNode.h index db7244e7019..5f0765e9c26 100644 --- a/src/ripple/shamap/SHAMapInnerNode.h +++ b/src/ripple/shamap/SHAMapInnerNode.h @@ -27,8 +27,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -53,7 +55,8 @@ class SHAMapInnerNode final : public SHAMapTreeNode, std::uint32_t fullBelowGen_ = 0; std::uint16_t isBranch_ = 0; - static std::mutex childLock; + /** A bitlock for the children of this node, with one bit per child */ + mutable std::atomic lock_ = 0; /** Convert arrays stored in `hashesAndChildren_` so they can store the requested number of children. @@ -155,7 +158,7 @@ class SHAMapInnerNode final : public SHAMapTreeNode, std::shared_ptr getChild(int branch); - virtual std::shared_ptr + std::shared_ptr canonicalizeChild(int branch, std::shared_ptr node); // sync functions diff --git a/src/ripple/shamap/impl/SHAMapInnerNode.cpp b/src/ripple/shamap/impl/SHAMapInnerNode.cpp index 6a2a4504fea..c47ac3864ba 100644 --- a/src/ripple/shamap/impl/SHAMapInnerNode.cpp +++ b/src/ripple/shamap/impl/SHAMapInnerNode.cpp @@ -19,11 +19,9 @@ #include -#include #include #include #include -#include #include #include #include @@ -33,14 +31,88 @@ #include #include -#include #include -#include #include +// This is used for the _mm_pause instruction: +#include + namespace ripple { -std::mutex SHAMapInnerNode::childLock; +/** A specialized 16-way spinlock used to protect inner node branches. + + This class packs 16 separate spinlocks into a single 16-bit value. It makes + it possible to lock any one lock at once or, alternatively, all together. + + The implementation tries to use portable constructs but has to be low-level + for performance. + */ +class SpinBitlock +{ +private: + std::atomic& bits_; + std::uint16_t mask_; + +public: + SpinBitlock(std::atomic& lock) : bits_(lock), mask_(0xFFFF) + { + } + + SpinBitlock(std::atomic& lock, int index) + : bits_(lock), mask_(1 << index) + { + assert(index >= 0 && index < 16); + } + + [[nodiscard]] bool + try_lock() + { + // If we want to grab all the individual bitlocks at once we cannot + // use `fetch_or`! To see why, imagine that `lock_ == 0x0020` which + // means that the `fetch_or` would return `0x0020` but all the bits + // would already be (incorrectly!) set. Oops! + std::uint16_t expected = 0; + + if (mask_ != 0xFFFF) + return (bits_.fetch_or(mask_, std::memory_order_acquire) & mask_) == + expected; + + return bits_.compare_exchange_weak( + expected, + mask_, + std::memory_order_acquire, + std::memory_order_relaxed); + } + + void + lock() + { + // Testing suggests that 99.9999% of the time this will succeed, so + // we try to optimize the fast path. + if (try_lock()) + return; + + do + { + // We try to spin for a few times: + for (int i = 0; i != 100; ++i) + { + if (try_lock()) + return; + + _mm_pause(); + } + + std::this_thread::yield(); + } while ((bits_.load(std::memory_order_relaxed) & mask_) == 0); + } + + void + unlock() + { + bits_.fetch_and(~mask_, std::memory_order_release); + } +}; SHAMapInnerNode::SHAMapInnerNode( std::uint32_t cowid, @@ -108,7 +180,10 @@ SHAMapInnerNode::clone(std::uint32_t cowid) const cloneHashes[branchNum] = thisHashes[indexNum]; }); } - std::lock_guard lock(childLock); + + SpinBitlock sl(lock_); + std::lock_guard lock(sl); + if (thisIsSparse) { int cloneChildIndex = 0; @@ -341,8 +416,11 @@ SHAMapInnerNode::getChildPointer(int branch) assert(branch >= 0 && branch < branchFactor); assert(!isEmptyBranch(branch)); - std::lock_guard lock(childLock); - return hashesAndChildren_.getChildren()[*getChildIndex(branch)].get(); + auto const index = *getChildIndex(branch); + + SpinBitlock sl(lock_, index); + std::lock_guard lock(sl); + return hashesAndChildren_.getChildren()[index].get(); } std::shared_ptr @@ -351,8 +429,11 @@ SHAMapInnerNode::getChild(int branch) assert(branch >= 0 && branch < branchFactor); assert(!isEmptyBranch(branch)); - std::lock_guard lock(childLock); - return hashesAndChildren_.getChildren()[*getChildIndex(branch)]; + auto const index = *getChildIndex(branch); + + SpinBitlock sl(lock_, index); + std::lock_guard lock(sl); + return hashesAndChildren_.getChildren()[index]; } SHAMapHash const& @@ -377,7 +458,9 @@ SHAMapInnerNode::canonicalizeChild( auto [_, hashes, children] = hashesAndChildren_.getHashesAndChildren(); assert(node->getHash() == hashes[childIndex]); - std::lock_guard lock(childLock); + SpinBitlock sl(lock_, childIndex); + std::lock_guard lock(sl); + if (children[childIndex]) { // There is already a node hooked up, return it diff --git a/src/ripple/shamap/impl/SHAMapSync.cpp b/src/ripple/shamap/impl/SHAMapSync.cpp index 8cd5bd704ef..1e233d55f23 100644 --- a/src/ripple/shamap/impl/SHAMapSync.cpp +++ b/src/ripple/shamap/impl/SHAMapSync.cpp @@ -583,7 +583,6 @@ SHAMap::addKnownNode( } auto const generation = f_.getFullBelowCache(ledgerSeq_)->getGeneration(); - auto newNode = SHAMapTreeNode::makeFromWire(rawNode); SHAMapNodeID iNodeID; auto iNode = root_.get(); @@ -612,6 +611,8 @@ SHAMap::addKnownNode( if (iNode == nullptr) { + auto newNode = SHAMapTreeNode::makeFromWire(rawNode); + if (!newNode || childHash != newNode->getHash()) { JLOG(journal_.warn()) << "Corrupt node received"; diff --git a/src/ripple/shamap/impl/TaggedPointer.ipp b/src/ripple/shamap/impl/TaggedPointer.ipp index d1110f49387..30bf68426b6 100644 --- a/src/ripple/shamap/impl/TaggedPointer.ipp +++ b/src/ripple/shamap/impl/TaggedPointer.ipp @@ -17,9 +17,9 @@ */ //============================================================================== -#include - +#include #include +#include #include