Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: Add fake symbol table as an intermediate state to move to SymbolTable API without taking locks. #5414

Merged
merged 23 commits into from
Jan 30, 2019
Merged
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b1fcd49
catch up with symtab-read-lock and to-string-on-symtab.
jmarantz Dec 18, 2018
121ec75
refactor toString
jmarantz Dec 22, 2018
a54a2bc
Merge branch 'master' into fake-symbol-table
jmarantz Dec 22, 2018
4b9570c
virtualize symbol-table.
jmarantz Dec 23, 2018
0e9303b
use virtual interface in tests.
jmarantz Dec 23, 2018
4fa1eb2
all tests working.
jmarantz Dec 24, 2018
8a6aec1
Fix asan failures, add comments, cleanup.
jmarantz Dec 25, 2018
2209115
clang-tidy fixes.
jmarantz Dec 25, 2018
adf956e
Merge branch 'master' into fake-symbol-table
jmarantz Jan 14, 2019
a92121d
Merge branch 'master' into fake-symbol-table
jmarantz Jan 17, 2019
c5e25e1
Merge branch 'master' into fake-symbol-table
jmarantz Jan 22, 2019
a5a112e
Sink Storage type nicknames into SymbolTable class.
jmarantz Jan 22, 2019
b37dc32
comment cleanup.
jmarantz Jan 23, 2019
9140665
Merge branch 'master' into fake-symbol-table
jmarantz Jan 26, 2019
11fd3c0
Privatize SymbolTable::free and incRefCount, friending helper classes…
jmarantz Jan 27, 2019
675b9d6
Improve comments, fix nits, typos, etc.
jmarantz Jan 28, 2019
35709b3
Remove 2-arg form of join().
jmarantz Jan 28, 2019
392a0be
Merge branch 'master' into fake-symbol-table
jmarantz Jan 29, 2019
e9f2b50
Review style nits and actually test for zero contentions in fake symb…
jmarantz Jan 29, 2019
71df963
Only start tracking the contentions right before doing the accesses.
jmarantz Jan 30, 2019
ecb1b88
Add missing include for vector.
jmarantz Jan 30, 2019
586103c
Merge branch 'master' into fake-symbol-table
jmarantz Jan 30, 2019
d804664
Merge branch 'master' into fake-symbol-table
jmarantz Jan 30, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
catch up with symtab-read-lock and to-string-on-symtab.
Signed-off-by: Joshua Marantz <jmarantz@google.com>
jmarantz committed Dec 18, 2018
commit b1fcd4924022f796523ea78eac7b1bf6b80f40fd
189 changes: 142 additions & 47 deletions source/common/stats/symbol_table_impl.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "common/stats/symbol_table_impl.h"

#include <iostream>
#include <memory>
#include <unordered_map>
#include <vector>
@@ -12,6 +13,30 @@ namespace Stats {
static const uint32_t SpilloverMask = 0x80;
static const uint32_t Low7Bits = 0x7f;

StatName::StatName(const StatName& src, SymbolStorage memory) : size_and_data_(memory) {
memcpy(memory, src.size_and_data_, src.size());
}

#ifndef ENVOY_CONFIG_COVERAGE
void StatName::debugPrint() {
if (size_and_data_ == nullptr) {
std::cout << "Null StatName" << std::endl << std::flush;
} else {
uint64_t nbytes = dataSize();
std::cout << "dataSize=" << nbytes << ":";
for (uint64_t i = 0; i < nbytes; ++i) {
std::cout << " " << static_cast<uint64_t>(data()[i]);
}
SymbolVec encoding = SymbolEncoding::decodeSymbols(data(), dataSize());
std::cout << ", numSymbols=" << encoding.size() << ":";
for (Symbol symbol : encoding) {
std::cout << " " << symbol;
}
std::cout << std::endl << std::flush;
}
}
#endif

SymbolEncoding::~SymbolEncoding() { ASSERT(vec_.empty()); }

void SymbolEncoding::addSymbol(Symbol symbol) {
@@ -57,19 +82,20 @@ SymbolVec SymbolEncoding::decodeSymbols(const SymbolStorage array, uint64_t size
// There is no guarantee that bytes will be aligned, so we can't cast to a
// uint16_t* and assign, but must individually copy the bytes.
static inline uint8_t* saveLengthToBytesReturningNext(uint64_t length, uint8_t* bytes) {
ASSERT(length < 65536);
ASSERT(length < StatNameMaxSize);
*bytes++ = length & 0xff;
*bytes++ = length >> 8;
return bytes;
}

void SymbolEncoding::moveToStorage(SymbolStorage symbol_array) {
uint64_t SymbolEncoding::moveToStorage(SymbolStorage symbol_array) {
uint64_t sz = size();
symbol_array = saveLengthToBytesReturningNext(sz, symbol_array);
if (sz != 0) {
memcpy(symbol_array, vec_.data(), sz * sizeof(uint8_t));
}
vec_.clear(); // Logically transfer ownership, enabling empty assert on destruct.
return sz + StatNameSizeEncodingBytes;
}

SymbolTable::SymbolTable()
@@ -102,11 +128,8 @@ SymbolEncoding SymbolTable::encode(const absl::string_view name) {

// Now take the lock and populate the Symbol objects, which involves bumping
// ref-counts in this.
{
Thread::LockGuard lock(lock_);
for (absl::string_view token : tokens) {
symbols.push_back(toSymbol(token));
}
for (absl::string_view token : tokens) {
symbols.push_back(toSymbol(token));
}

// Now efficiently encode the array of 32-bit symbols into a uint8_t array.
@@ -116,77 +139,125 @@ SymbolEncoding SymbolTable::encode(const absl::string_view name) {
return encoding;
}

std::string SymbolTable::decode(const SymbolStorage symbol_array, uint64_t size) const {
return decode(SymbolEncoding::decodeSymbols(symbol_array, size));
std::string SymbolTable::toString(const StatName& stat_name) const {
return decodeSymbolVec(SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()));
}

std::string SymbolTable::decode(const SymbolVec& symbols) const {
std::string SymbolTable::decodeSymbolVec(const SymbolVec& symbols) const {
std::vector<absl::string_view> name_tokens;
name_tokens.reserve(symbols.size());
{
// Hold the lock only while decoding symbols.
Thread::LockGuard lock(lock_);
absl::ReaderMutexLock lock(&lock_);
for (Symbol symbol : symbols) {
name_tokens.push_back(fromSymbol(symbol));
}
}
return absl::StrJoin(name_tokens, ".");
}

void SymbolTable::free(StatName stat_name) {
void SymbolTable::incRefCount(const StatName& stat_name) {
// Before taking the lock, decode the array of symbols from the SymbolStorage.
SymbolVec symbols = SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.numBytes());
SymbolVec symbols = SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize());

Thread::LockGuard lock(lock_);
absl::ReaderMutexLock lock(&lock_);
for (Symbol symbol : symbols) {
auto decode_search = decode_map_.find(symbol);
ASSERT(decode_search != decode_map_.end());

auto encode_search = encode_map_.find(decode_search->second);
auto encode_search = encode_map_.find(decode_search->second.get());
ASSERT(encode_search != encode_map_.end());

encode_search->second.ref_count_--;
++encode_search->second->ref_count_;
}
}

void SymbolTable::free(const StatName& stat_name) {
// Before taking the lock, decode the array of symbols from the SymbolStorage.
SymbolVec symbols = SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize());

absl::MutexLock lock(&lock_); // Takes write-lock as we may mutate decode_map_ and encode_map_.
for (Symbol symbol : symbols) {
auto decode_search = decode_map_.find(symbol);
ASSERT(decode_search != decode_map_.end());

auto encode_search = encode_map_.find(decode_search->second.get());
ASSERT(encode_search != encode_map_.end());

--encode_search->second->ref_count_;

// If that was the last remaining client usage of the symbol, erase the the current
// mappings and add the now-unused symbol to the reuse pool.
if (encode_search->second.ref_count_ == 0) {
if (encode_search->second->ref_count_ == 0) {
decode_map_.erase(decode_search);
encode_map_.erase(encode_search);
pool_.push(symbol);
}
}
}

Symbol SymbolTable::toSymbol(absl::string_view sv) EXCLUSIVE_LOCKS_REQUIRED(lock_) {
Symbol result;
auto encode_find = encode_map_.find(sv);
// If the string segment doesn't already exist,
if (encode_find == encode_map_.end()) {
// We create the actual string, place it in the decode_map_, and then insert a string_view
// pointing to it in the encode_map_. This allows us to only store the string once.
std::string str = std::string(sv);
Symbol SymbolTable::toSymbol(absl::string_view sv) {
{
// First try to find the symbol with just a read-lock, so concurrent
// lookups for an already-allocated symbol do not contend.
absl::ReaderMutexLock lock(&lock_);
auto encode_find = encode_map_.find(sv);
if (encode_find != encode_map_.end()) {
// Increment the refcount of the already existing symbol. Note that the
// ref_count_ is atomic to allow incrementing it under read-lock.
SharedSymbol& shared_symbol = *encode_find->second;
++(shared_symbol.ref_count_);
return shared_symbol.symbol_;
}
}

// If the find() under read-lock failed, we need to release it and take a
// write-lock. Note that another thread may race to insert the symbol during
// this window of time with the lock released, so we need to check again
// under write-lock, which we do by proactively allocating the string and
// attempting to insert it into the encode_map. If that worked, we also
// write the decode-map, transferring the ownership of the string to the
// decode-map value.
absl::MutexLock lock(&lock_);

// If the string segment doesn't already exist, create the actual string as a
// nul-terminated char-array and insert into encode_map_, and then insert a
// string_view pointing to it in the encode_map_. This allows us to only store
// the string once.
size_t size = sv.size() + 1;
std::unique_ptr<char[]> str = std::make_unique<char[]>(size);
StringUtil::strlcpy(str.get(), sv.data(), size);

auto encode_insert =
encode_map_.insert({str.get(), std::make_unique<SharedSymbol>(next_symbol_)});
SharedSymbol& shared_symbol = *encode_insert.first->second;

if (encode_insert.second) {
// The insertion took place.
auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)});
ASSERT(decode_insert.second);

auto encode_insert = encode_map_.insert({decode_insert.first->second, {next_symbol_, 1}});
ASSERT(encode_insert.second);

result = next_symbol_;
newSymbol();
} else {
// If the insertion didn't take place, return the actual value at that location and up the
// refcount at that location
result = encode_find->second.symbol_;
++(encode_find->second.ref_count_);
// If the insertion didn't take place -- due to another thread racing to
// insert the same symbmol after we drop the read-lock above -- we can
// return the shared symbol, but we must bump the refcount.
++(shared_symbol.ref_count_);

// Note: this condition is hard to hit in tests as it requires a tight race
// between multiple threads concurrently creating the same symbol.
// Uncommenting this line can help rapidly determine coverage during
// development. StatNameTest.RacingSymbolCreation hits this occasionally
// when testing with optimization, and frequently with fastbuild and debug.
//
// std::cerr << "Covered insertion race" << std::endl;
}
return result;
return shared_symbol.symbol_;
}

absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const
EXCLUSIVE_LOCKS_REQUIRED(lock_) {
absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const SHARED_LOCKS_REQUIRED(lock_) {
auto search = decode_map_.find(symbol);
ASSERT(search != decode_map_.end());
return search->second;
RELEASE_ASSERT(search != decode_map_.end(), "no such symbol");
return absl::string_view(search->second.get());
}

void SymbolTable::newSymbol() EXCLUSIVE_LOCKS_REQUIRED(lock_) {
@@ -205,23 +276,47 @@ bool SymbolTable::lessThan(const StatName& a, const StatName& b) const {
// If this becomes a performance bottleneck (e.g. during sorting), we could
// provide an iterator-like interface for incrementally decoding the symbols
// without allocating memory.
SymbolVec av = SymbolEncoding::decodeSymbols(a.data(), a.numBytes());
SymbolVec bv = SymbolEncoding::decodeSymbols(b.data(), b.numBytes());
SymbolVec av = SymbolEncoding::decodeSymbols(a.data(), a.dataSize());
SymbolVec bv = SymbolEncoding::decodeSymbols(b.data(), b.dataSize());
for (uint64_t i = 0, n = std::min(av.size(), bv.size()); i < n; ++i) {
if (av[i] != bv[i]) {
Thread::LockGuard lock(lock_);
absl::ReaderMutexLock lock(&lock_);
return fromSymbol(av[i]) < fromSymbol(bv[i]);
}
}
return av.size() < bv.size();
}

#ifndef ENVOY_CONFIG_COVERAGE
void SymbolTable::debugPrint() const {
absl::ReaderMutexLock lock(&lock_);
std::vector<Symbol> symbols;
for (const auto& p : decode_map_) {
symbols.push_back(p.first);
}
std::sort(symbols.begin(), symbols.end());
for (Symbol symbol : symbols) {
const char* token = decode_map_.find(symbol)->second.get();
const SharedSymbol& shared_symbol = *encode_map_.find(token)->second;
std::cout << symbol << ": '" << token << "' (" << shared_symbol.ref_count_ << ")" << std::endl;
}
std::cout << std::flush;
}
#endif

StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) {
SymbolEncoding encoding = table.encode(name);
bytes_ = std::make_unique<uint8_t[]>(encoding.bytesRequired());
encoding.moveToStorage(bytes_.get());
}

StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) {
uint64_t size = src.size();
bytes_ = std::make_unique<uint8_t[]>(size);
src.copyToStorage(bytes_.get());
table.incRefCount(statName());
}

StatNameStorage::~StatNameStorage() {
// StatNameStorage is not fully RAII: you must call free(SymbolTable&) to
// decrement the reference counts held by the SymbolTable on behalf of
@@ -236,8 +331,8 @@ void StatNameStorage::free(SymbolTable& table) {
}

StatNameJoiner::StatNameJoiner(StatName a, StatName b) {
const uint64_t a_size = a.numBytes();
const uint64_t b_size = b.numBytes();
const uint64_t a_size = a.dataSize();
const uint64_t b_size = b.dataSize();
uint8_t* const p = alloc(a_size + b_size);
memcpy(p, a.data(), a_size);
memcpy(p + a_size, b.data(), b_size);
@@ -246,18 +341,18 @@ StatNameJoiner::StatNameJoiner(StatName a, StatName b) {
StatNameJoiner::StatNameJoiner(const std::vector<StatName>& stat_names) {
uint64_t num_bytes = 0;
for (StatName stat_name : stat_names) {
num_bytes += stat_name.numBytes();
num_bytes += stat_name.dataSize();
}
uint8_t* p = alloc(num_bytes);
for (StatName stat_name : stat_names) {
num_bytes = stat_name.numBytes();
num_bytes = stat_name.dataSize();
memcpy(p, stat_name.data(), num_bytes);
p += num_bytes;
}
}

uint8_t* StatNameJoiner::alloc(uint64_t num_bytes) {
bytes_ = std::make_unique<uint8_t[]>(num_bytes + 2); // +2 bytes for the length up to 64k.
bytes_ = std::make_unique<uint8_t[]>(num_bytes + StatNameSizeEncodingBytes);
return saveLengthToBytesReturningNext(num_bytes, bytes_.get());
}

Loading