From 5d40fc4cc406138c6b6b49655ede1d9e59beba67 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 26 Oct 2019 21:44:26 -0400 Subject: [PATCH 01/36] Avoid crashes in fuzz-tests due to input strings being too long. Signed-off-by: Joshua Marantz --- test/common/stats/symbol_table_fuzz_test.cc | 34 +++++++++++++++------ 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/test/common/stats/symbol_table_fuzz_test.cc b/test/common/stats/symbol_table_fuzz_test.cc index 426b63391b36..5b5750e8a8ef 100644 --- a/test/common/stats/symbol_table_fuzz_test.cc +++ b/test/common/stats/symbol_table_fuzz_test.cc @@ -5,10 +5,34 @@ #include "test/fuzz/fuzz_runner.h" #include "test/fuzz/utility.h" +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" + namespace Envoy { namespace Stats { namespace Fuzz { +// Adds a stat-name to the symbol table, discarding it if it ends in ".", and +// splitting it in two if it's too long (64k bytes). +// +// The actual requirement for StatName allows up to 64k "."-separated segments +// of 64k, but it's simpler to just limit the whole string. I don't think +// there's that much value in using strings larger than 64k. +void addSymbol(StatNamePool& pool, SymbolTable& symbol_table, absl::string_view str) { + while (absl::EndsWith(str, ".")) { + str.remove_suffix(1); + } + + if (str.size() >= StatNameMaxSize) { + size_t halfway = str.size() / 2; + addSymbol(pool, symbol_table, str.substr(0, halfway)); + addSymbol(pool, symbol_table, str.substr(halfway)); + } else if (!str.empty()) { + StatName stat_name = pool.add(str); + FUZZ_ASSERT(str == symbol_table.toString(stat_name)); + } +} + // Fuzzer for symbol tables. DEFINE_FUZZER(const uint8_t* buf, size_t len) { FuzzedDataProvider provider(buf, len); @@ -16,15 +40,7 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { StatNamePool pool(symbol_table); while (provider.remaining_bytes() != 0) { - std::string next_data = provider.ConsumeRandomLengthString(provider.remaining_bytes()); - - // ending with a "." is not considered legal, so just skip. - if (!next_data.empty() && next_data[next_data.size() - 1] == '.') { - continue; - } - - StatName stat_name = pool.add(next_data); - FUZZ_ASSERT(next_data == symbol_table.toString(stat_name)); + addSymbol(pool, symbol_table, provider.ConsumeRandomLengthString(provider.remaining_bytes())); } } From 8f9ab938581e5e2560265e856db694bbb3427746 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 28 Oct 2019 18:15:36 -0400 Subject: [PATCH 02/36] Eliminate 64k limit on # of segments in a real symbol-table stat-name, or on the length of a fake-symbol table stat-name. Signed-off-by: Joshua Marantz --- source/common/stats/fake_symbol_table_impl.h | 17 +++--- source/common/stats/symbol_table_impl.cc | 56 +++++++++++++++++-- source/common/stats/symbol_table_impl.h | 52 +++++++++-------- test/common/stats/symbol_table_fuzz_test.cc | 34 +++-------- test/common/stats/symbol_table_impl_test.cc | 52 +++++++++++++++++ test/common/stats/thread_local_store_test.cc | 14 ++--- test/extensions/filters/http/dynamo/BUILD | 1 + .../filters/http/dynamo/dynamo_stats_test.cc | 2 + test/integration/stats_integration_test.cc | 8 ++- 9 files changed, 161 insertions(+), 75 deletions(-) diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index b2a86d6cfdad..07dfb7518c90 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -64,13 +64,10 @@ class FakeSymbolTableImpl : public SymbolTable { // than 256 of them. RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded"); - // First encode all the names. The '1' here represents the number of - // names. The num_names * StatNameSizeEncodingBytes reserves space for the - // lengths of each name. - size_t total_size_bytes = 1 + num_names * StatNameSizeEncodingBytes; - + size_t total_size_bytes = 1; /* one byte for holding the number of names */ for (uint32_t i = 0; i < num_names; ++i) { - total_size_bytes += names[i].size(); + size_t name_size = names[i].size(); + total_size_bytes += SymbolTableImpl::Encoding::encodingSizeBytes(name_size) + name_size; } // Now allocate the exact number of bytes required and move the encodings @@ -81,7 +78,7 @@ class FakeSymbolTableImpl : public SymbolTable { for (uint32_t i = 0; i < num_names; ++i) { auto& name = names[i]; size_t sz = name.size(); - p = SymbolTableImpl::writeLengthReturningNext(sz, p); + p = SymbolTableImpl::Encoding::encode(sz, p); if (!name.empty()) { memcpy(p, name.data(), sz * sizeof(uint8_t)); p += sz; @@ -143,8 +140,10 @@ class FakeSymbolTableImpl : public SymbolTable { StoragePtr encodeHelper(absl::string_view name) const { ASSERT(!absl::EndsWith(name, ".")); - auto bytes = std::make_unique(name.size() + StatNameSizeEncodingBytes); - uint8_t* buffer = SymbolTableImpl::writeLengthReturningNext(name.size(), bytes.get()); + uint64_t bytes_required = + SymbolTableImpl::Encoding::encodingSizeBytes(name.size()) + name.size(); + auto bytes = std::make_unique(bytes_required); + uint8_t* buffer = SymbolTableImpl::Encoding::encode(name.size(), bytes.get()); memcpy(buffer, name.data(), name.size()); return bytes; } diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index a0efa55f8206..0b049b0f1e60 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -16,6 +16,52 @@ namespace Stats { static const uint32_t SpilloverMask = 0x80; static const uint32_t Low7Bits = 0x7f; +uint64_t SymbolTableImpl::Encoding::bytesRequired() const { + uint64_t data_size = dataBytesRequired(); + return data_size + encodingSizeBytes(data_size); +} + +uint64_t SymbolTableImpl::Encoding::decode(const uint8_t* encoding) { + uint64_t size = 0; + for (uint32_t shift = 0; true; ++encoding, shift += 7) { + uint64_t uc = static_cast(*encoding); + size |= (uc & Low7Bits) << shift; + if ((uc & SpilloverMask) == 0) { + break; + } + } + return size; +} + +uint64_t StatName::dataSize() const { + if (size_and_data_ == nullptr) { + return 0; + } + return SymbolTableImpl::Encoding::decode(size_and_data_); +} + +uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { + uint64_t num_bytes = 0; + do { + ++num_bytes; + number >>= 7; + } while (number != 0); + return num_bytes; +} + +// writeLengthReturningNext ? +uint8_t* SymbolTableImpl::Encoding::encode(uint64_t number, uint8_t* bytes) { + do { + if (number < (1 << 7)) { + *bytes++ = number; // number <= 127 get encoded in one byte. + } else { + *bytes++ = ((number & Low7Bits) | SpilloverMask); // number >= 128 need spillover bytes. + } + number >>= 7; + } while (number != 0); + return bytes; +} + #ifndef ENVOY_CONFIG_COVERAGE void StatName::debugPrint() { if (size_and_data_ == nullptr) { @@ -84,12 +130,13 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar uint64_t SymbolTableImpl::Encoding::moveToStorage(SymbolTable::Storage symbol_array) { const uint64_t sz = dataBytesRequired(); - symbol_array = writeLengthReturningNext(sz, symbol_array); + symbol_array = encode(sz, symbol_array); if (sz != 0) { memcpy(symbol_array, vec_.data(), sz * sizeof(uint8_t)); } vec_.clear(); // Logically transfer ownership, enabling empty assert on destruct. - return sz + StatNameSizeEncodingBytes; + return sz + SymbolTableImpl::Encoding::encodingSizeBytes(sz); + ; } SymbolTableImpl::SymbolTableImpl() @@ -456,8 +503,9 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con for (StatName stat_name : stat_names) { num_bytes += stat_name.dataSize(); } - auto bytes = std::make_unique(num_bytes + StatNameSizeEncodingBytes); - uint8_t* p = writeLengthReturningNext(num_bytes, bytes.get()); + auto bytes = std::make_unique(SymbolTableImpl::Encoding::encodingSizeBytes(num_bytes) + + num_bytes); + uint8_t* p = SymbolTableImpl::Encoding::encode(num_bytes, bytes.get()); for (StatName stat_name : stat_names) { const uint64_t stat_name_bytes = stat_name.dataSize(); memcpy(p, stat_name.data(), stat_name_bytes); diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 5f8078f0ebd4..1a594a444077 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -33,8 +33,8 @@ using Symbol = uint32_t; /** * We encode the byte-size of a StatName as its first two bytes. */ -constexpr uint64_t StatNameSizeEncodingBytes = 2; -constexpr uint64_t StatNameMaxSize = 1 << (8 * StatNameSizeEncodingBytes); // 65536 +// constexpr uint64_t StatNameSizeEncodingBytes = 2; +// constexpr uint64_t StatNameMaxSize = 1 << (8 * StatNameSizeEncodingBytes); // 65536 /** Transient representations of a vector of 32-bit symbols */ using SymbolVec = std::vector; @@ -104,7 +104,7 @@ class SymbolTableImpl : public SymbolTable { * Returns the number of bytes required to represent StatName as a uint8_t * array, including the encoded size. */ - uint64_t bytesRequired() const { return dataBytesRequired() + StatNameSizeEncodingBytes; } + uint64_t bytesRequired() const; /** * @return the number of uint8_t entries we collected while adding symbols. @@ -120,6 +120,20 @@ class SymbolTableImpl : public SymbolTable { */ uint64_t moveToStorage(SymbolTable::Storage array); + static uint64_t decode(const uint8_t* encoding); + static uint64_t encodingSizeBytes(uint64_t number); + + /** + * Saves the specified length into the byte array, returning the next byte. + * There is no guarantee that bytes will be aligned, so we can't cast to a + * uint16_t* and assign, but must individually copy the bytes. + * + * @param length the length in bytes to write. + * @param bytes the pointer into which to write the length. + * @return the pointer to the next byte for writing the data. + */ + static uint8_t* encode(uint64_t number, uint8_t* buffer); + private: std::vector vec_; }; @@ -144,22 +158,6 @@ class SymbolTableImpl : public SymbolTable { void debugPrint() const override; #endif - /** - * Saves the specified length into the byte array, returning the next byte. - * There is no guarantee that bytes will be aligned, so we can't cast to a - * uint16_t* and assign, but must individually copy the bytes. - * - * @param length the length in bytes to write. Must be < StatNameMaxSize. - * @param bytes the pointer into which to write the length. - * @return the pointer to the next byte for writing the data. - */ - static inline uint8_t* writeLengthReturningNext(uint64_t length, uint8_t* bytes) { - ASSERT(length < StatNameMaxSize); - *bytes++ = length & 0xff; - *bytes++ = length >> 8; - return bytes; - } - StatNameSetPtr makeSet(absl::string_view name) override; void forgetSet(StatNameSet& stat_name_set) override; uint64_t getRecentLookups(const RecentLookupsFn&) const override; @@ -362,18 +360,16 @@ class StatName { * @return uint64_t the number of bytes in the symbol array, excluding the two-byte * overhead for the size itself. */ - uint64_t dataSize() const { - if (size_and_data_ == nullptr) { - return 0; - } - return size_and_data_[0] | (static_cast(size_and_data_[1]) << 8); - } + uint64_t dataSize() const; /** * @return uint64_t the number of bytes in the symbol array, including the two-byte * overhead for the size itself. */ - uint64_t size() const { return dataSize() + StatNameSizeEncodingBytes; } + uint64_t size() const { + uint64_t sz = dataSize(); + return sz + SymbolTableImpl::Encoding::encodingSizeBytes(sz); + } void copyToStorage(SymbolTable::Storage storage) { memcpy(storage, size_and_data_, size()); } @@ -384,7 +380,9 @@ class StatName { /** * @return A pointer to the first byte of data (skipping over size bytes). */ - const uint8_t* data() const { return size_and_data_ + StatNameSizeEncodingBytes; } + const uint8_t* data() const { + return size_and_data_ + SymbolTableImpl::Encoding::encodingSizeBytes(dataSize()); + } /** * @return whether this is empty. diff --git a/test/common/stats/symbol_table_fuzz_test.cc b/test/common/stats/symbol_table_fuzz_test.cc index 5b5750e8a8ef..426b63391b36 100644 --- a/test/common/stats/symbol_table_fuzz_test.cc +++ b/test/common/stats/symbol_table_fuzz_test.cc @@ -5,34 +5,10 @@ #include "test/fuzz/fuzz_runner.h" #include "test/fuzz/utility.h" -#include "absl/strings/match.h" -#include "absl/strings/string_view.h" - namespace Envoy { namespace Stats { namespace Fuzz { -// Adds a stat-name to the symbol table, discarding it if it ends in ".", and -// splitting it in two if it's too long (64k bytes). -// -// The actual requirement for StatName allows up to 64k "."-separated segments -// of 64k, but it's simpler to just limit the whole string. I don't think -// there's that much value in using strings larger than 64k. -void addSymbol(StatNamePool& pool, SymbolTable& symbol_table, absl::string_view str) { - while (absl::EndsWith(str, ".")) { - str.remove_suffix(1); - } - - if (str.size() >= StatNameMaxSize) { - size_t halfway = str.size() / 2; - addSymbol(pool, symbol_table, str.substr(0, halfway)); - addSymbol(pool, symbol_table, str.substr(halfway)); - } else if (!str.empty()) { - StatName stat_name = pool.add(str); - FUZZ_ASSERT(str == symbol_table.toString(stat_name)); - } -} - // Fuzzer for symbol tables. DEFINE_FUZZER(const uint8_t* buf, size_t len) { FuzzedDataProvider provider(buf, len); @@ -40,7 +16,15 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { StatNamePool pool(symbol_table); while (provider.remaining_bytes() != 0) { - addSymbol(pool, symbol_table, provider.ConsumeRandomLengthString(provider.remaining_bytes())); + std::string next_data = provider.ConsumeRandomLengthString(provider.remaining_bytes()); + + // ending with a "." is not considered legal, so just skip. + if (!next_data.empty() && next_data[next_data.size() - 1] == '.') { + continue; + } + + StatName stat_name = pool.add(next_data); + FUZZ_ASSERT(next_data == symbol_table.toString(stat_name)); } } diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index 2ddd52b56499..e272bd1f59b1 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -69,6 +69,14 @@ class StatNameTest : public testing::TestWithParam { StatName makeStat(absl::string_view name) { return pool_->add(name); } + std::vector serializeDeserialize(uint64_t number) { + uint64_t num_bytes = SymbolTableImpl::Encoding::encodingSizeBytes(number); + uint8_t buf[10]; + EXPECT_EQ(buf + num_bytes, SymbolTableImpl::Encoding::encode(number, buf)) << number; + EXPECT_EQ(number, SymbolTableImpl::Encoding::decode(buf)) << number; + return std::vector(buf, buf + num_bytes); + } + FakeSymbolTableImpl* fake_symbol_table_{nullptr}; SymbolTableImpl* real_symbol_table_{nullptr}; std::unique_ptr table_; @@ -78,6 +86,36 @@ class StatNameTest : public testing::TestWithParam { INSTANTIATE_TEST_SUITE_P(StatNameTest, StatNameTest, testing::ValuesIn({SymbolTableType::Real, SymbolTableType::Fake})); +TEST_P(StatNameTest, SerializeBytes) { + EXPECT_EQ(std::vector{1}, serializeDeserialize(1)); + EXPECT_EQ(std::vector{127}, serializeDeserialize(127)); + EXPECT_EQ((std::vector{128, 1}), serializeDeserialize(128)); + EXPECT_EQ((std::vector{129, 1}), serializeDeserialize(129)); + EXPECT_EQ((std::vector{255, 1}), serializeDeserialize(255)); + EXPECT_EQ((std::vector{255, 127}), serializeDeserialize(16383)); + EXPECT_EQ((std::vector{128, 128, 1}), serializeDeserialize(16384)); + EXPECT_EQ((std::vector{129, 128, 1}), serializeDeserialize(16385)); + + auto power2 = [](uint32_t exp) -> uint64_t { + uint64_t one = 1; + return one << exp; + }; + EXPECT_EQ((std::vector{255, 255, 127}), serializeDeserialize(power2(21) - 1)); + EXPECT_EQ((std::vector{128, 128, 128, 1}), serializeDeserialize(power2(21))); + EXPECT_EQ((std::vector{129, 128, 128, 1}), serializeDeserialize(power2(21) + 1)); + EXPECT_EQ((std::vector{255, 255, 255, 127}), serializeDeserialize(power2(28) - 1)); + EXPECT_EQ((std::vector{128, 128, 128, 128, 1}), serializeDeserialize(power2(28))); + EXPECT_EQ((std::vector{129, 128, 128, 128, 1}), serializeDeserialize(power2(28) + 1)); + EXPECT_EQ((std::vector{255, 255, 255, 255, 127}), serializeDeserialize(power2(35) - 1)); + EXPECT_EQ((std::vector{128, 128, 128, 128, 128, 1}), serializeDeserialize(power2(35))); + EXPECT_EQ((std::vector{129, 128, 128, 128, 128, 1}), + serializeDeserialize(power2(35) + 1)); + + for (uint32_t i = 0; i < 17000; ++i) { + serializeDeserialize(i); + } +} + TEST_P(StatNameTest, AllocFree) { encodeDecode("hello.world"); } TEST_P(StatNameTest, TestArbitrarySymbolRoundtrip) { @@ -100,6 +138,20 @@ TEST_P(StatNameTest, Test100KSymbolsRoundtrip) { } } +TEST_P(StatNameTest, TestLongSymbolName) { + std::string long_name(100000, 'a'); + EXPECT_EQ(long_name, encodeDecode(long_name)); +} + +TEST_P(StatNameTest, TestLongSequence) { + std::string long_name("a"); + for (int i = 0; i < 100000; ++i) { + absl::StrAppend(&long_name, ".a"); + } + + EXPECT_EQ(long_name, encodeDecode(long_name)); +} + TEST_P(StatNameTest, TestUnusualDelimitersRoundtrip) { const std::vector stat_names = {".x", "..x", "...x", "foo", "foo.x", ".foo", ".foo.x", ".foo..x", "..foo.x", "..foo..x"}; diff --git a/test/common/stats/thread_local_store_test.cc b/test/common/stats/thread_local_store_test.cc index 63cd6ed5577d..68b5e48efac6 100644 --- a/test/common/stats/thread_local_store_test.cc +++ b/test/common/stats/thread_local_store_test.cc @@ -911,8 +911,8 @@ TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithoutTlsFakeSymbolTable) { TestUtil::MemoryTest memory_test; TestUtil::forEachSampleStat( 1000, [this](absl::string_view name) { store_->counter(std::string(name)); }); - EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 15268336); // June 30, 2019 - EXPECT_MEMORY_LE(memory_test.consumedBytes(), 16 * million_); + EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 14892880); // Oct 28, 2019 + EXPECT_MEMORY_LE(memory_test.consumedBytes(), 15 * million_); } TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithTlsFakeSymbolTable) { @@ -921,7 +921,7 @@ TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithTlsFakeSymbolTable) { TestUtil::MemoryTest memory_test; TestUtil::forEachSampleStat( 1000, [this](absl::string_view name) { store_->counter(std::string(name)); }); - EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 17496848); // June 30, 2019 + EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 17121392); // Oct 28, 2019 EXPECT_MEMORY_LE(memory_test.consumedBytes(), 18 * million_); } @@ -931,8 +931,8 @@ TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithoutTlsRealSymbolTable) { TestUtil::MemoryTest memory_test; TestUtil::forEachSampleStat( 1000, [this](absl::string_view name) { store_->counter(std::string(name)); }); - EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 9139120); // Aug 9, 2019 - EXPECT_MEMORY_LE(memory_test.consumedBytes(), 10 * million_); + EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 8017968); // Oct 28, 2019 + EXPECT_MEMORY_LE(memory_test.consumedBytes(), 9 * million_); } TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithTlsRealSymbolTable) { @@ -941,8 +941,8 @@ TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithTlsRealSymbolTable) { TestUtil::MemoryTest memory_test; TestUtil::forEachSampleStat( 1000, [this](absl::string_view name) { store_->counter(std::string(name)); }); - EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 11367632); // Aug 9, 2019 - EXPECT_MEMORY_LE(memory_test.consumedBytes(), 12 * million_); + EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 10246480); // Oct 28, 2019 + EXPECT_MEMORY_LE(memory_test.consumedBytes(), 11 * million_); } TEST_F(StatsThreadLocalStoreTest, ShuttingDown) { diff --git a/test/extensions/filters/http/dynamo/BUILD b/test/extensions/filters/http/dynamo/BUILD index 9d87a0531b40..bdaa4869c276 100644 --- a/test/extensions/filters/http/dynamo/BUILD +++ b/test/extensions/filters/http/dynamo/BUILD @@ -46,6 +46,7 @@ envoy_extension_cc_test( deps = [ "//source/common/stats:stats_lib", "//source/extensions/filters/http/dynamo:dynamo_stats_lib", + "//test/common/stats:stat_test_utility_lib", "//test/mocks/stats:stats_mocks", ], ) diff --git a/test/extensions/filters/http/dynamo/dynamo_stats_test.cc b/test/extensions/filters/http/dynamo/dynamo_stats_test.cc index d479f4a1092f..232ca007f043 100644 --- a/test/extensions/filters/http/dynamo/dynamo_stats_test.cc +++ b/test/extensions/filters/http/dynamo/dynamo_stats_test.cc @@ -2,6 +2,7 @@ #include "extensions/filters/http/dynamo/dynamo_stats.h" +#include "test/common/stats/stat_test_utility.h" #include "test/mocks/stats/mocks.h" #include "gmock/gmock.h" @@ -14,6 +15,7 @@ namespace Dynamo { namespace { TEST(DynamoStats, PartitionIdStatString) { + Stats::TestUtil::SymbolTableCreatorTestPeer::setUseFakeSymbolTables(true); Stats::IsolatedStoreImpl store; auto build_partition_string = [&store](const std::string& stat_prefix, const std::string& table_name, diff --git a/test/integration/stats_integration_test.cc b/test/integration/stats_integration_test.cc index aeb41d47ab82..b0717cd6a8fb 100644 --- a/test/integration/stats_integration_test.cc +++ b/test/integration/stats_integration_test.cc @@ -257,6 +257,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // 2019/09/30 8354 43310 44000 Implement transport socket match. // 2019/10/17 8537 43308 44000 add new enum value HTTP3 // 2019/10/17 8484 43340 44000 stats: add unit support to histogram + // 2019/10/17 8779 42856 43000 use var-length coding for name length // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -270,8 +271,8 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // If you encounter a failure here, please see // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests // for details on how to fix. - EXPECT_MEMORY_EQ(m_per_cluster, 43340); // 104 bytes higher than a debug build. - EXPECT_MEMORY_LE(m_per_cluster, 44000); + EXPECT_MEMORY_EQ(m_per_cluster, 42856); // 104 bytes higher than a debug build. + EXPECT_MEMORY_LE(m_per_cluster, 43000); } TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { @@ -301,6 +302,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // 2019/09/30 8354 34969 35000 Implement transport socket match. // 2019/10/17 8537 34966 35000 add new enum value HTTP3 // 2019/10/17 8484 34998 35000 stats: add unit support to histogram + // 2019/10/17 8484 34990 35000 use var-length coding for name lengths // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -314,7 +316,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // If you encounter a failure here, please see // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests // for details on how to fix. - EXPECT_MEMORY_EQ(m_per_cluster, 34998); // 104 bytes higher than a debug build. + EXPECT_MEMORY_EQ(m_per_cluster, 34990); // 104 bytes higher than a debug build. EXPECT_MEMORY_LE(m_per_cluster, 36000); } From beb46844a306abda30fde899dc21e5a8038ba151 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 28 Oct 2019 18:26:36 -0400 Subject: [PATCH 03/36] some cleanup Signed-off-by: Joshua Marantz --- source/common/stats/fake_symbol_table_impl.h | 4 ++-- source/common/stats/symbol_table_impl.cc | 16 +++++++------- source/common/stats/symbol_table_impl.h | 22 ++++++++++++++++---- test/common/stats/symbol_table_impl_test.cc | 4 ++-- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 07dfb7518c90..34103fd0133b 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -78,7 +78,7 @@ class FakeSymbolTableImpl : public SymbolTable { for (uint32_t i = 0; i < num_names; ++i) { auto& name = names[i]; size_t sz = name.size(); - p = SymbolTableImpl::Encoding::encode(sz, p); + p = SymbolTableImpl::Encoding::writeEncodingReturningNext(sz, p); if (!name.empty()) { memcpy(p, name.data(), sz * sizeof(uint8_t)); p += sz; @@ -143,7 +143,7 @@ class FakeSymbolTableImpl : public SymbolTable { uint64_t bytes_required = SymbolTableImpl::Encoding::encodingSizeBytes(name.size()) + name.size(); auto bytes = std::make_unique(bytes_required); - uint8_t* buffer = SymbolTableImpl::Encoding::encode(name.size(), bytes.get()); + uint8_t* buffer = SymbolTableImpl::Encoding::writeEncodingReturningNext(name.size(), bytes.get()); memcpy(buffer, name.data(), name.size()); return bytes; } diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 0b049b0f1e60..83c6c3546647 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -21,23 +21,23 @@ uint64_t SymbolTableImpl::Encoding::bytesRequired() const { return data_size + encodingSizeBytes(data_size); } -uint64_t SymbolTableImpl::Encoding::decode(const uint8_t* encoding) { - uint64_t size = 0; +uint64_t SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { + uint64_t number = 0; for (uint32_t shift = 0; true; ++encoding, shift += 7) { uint64_t uc = static_cast(*encoding); - size |= (uc & Low7Bits) << shift; + number |= (uc & Low7Bits) << shift; if ((uc & SpilloverMask) == 0) { break; } } - return size; + return number; } uint64_t StatName::dataSize() const { if (size_and_data_ == nullptr) { return 0; } - return SymbolTableImpl::Encoding::decode(size_and_data_); + return SymbolTableImpl::Encoding::decodeNumber(size_and_data_); } uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { @@ -50,7 +50,7 @@ uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { } // writeLengthReturningNext ? -uint8_t* SymbolTableImpl::Encoding::encode(uint64_t number, uint8_t* bytes) { +uint8_t* SymbolTableImpl::Encoding::writeEncodingReturningNext(uint64_t number, uint8_t* bytes) { do { if (number < (1 << 7)) { *bytes++ = number; // number <= 127 get encoded in one byte. @@ -130,7 +130,7 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar uint64_t SymbolTableImpl::Encoding::moveToStorage(SymbolTable::Storage symbol_array) { const uint64_t sz = dataBytesRequired(); - symbol_array = encode(sz, symbol_array); + symbol_array = writeEncodingReturningNext(sz, symbol_array); if (sz != 0) { memcpy(symbol_array, vec_.data(), sz * sizeof(uint8_t)); } @@ -505,7 +505,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con } auto bytes = std::make_unique(SymbolTableImpl::Encoding::encodingSizeBytes(num_bytes) + num_bytes); - uint8_t* p = SymbolTableImpl::Encoding::encode(num_bytes, bytes.get()); + uint8_t* p = SymbolTableImpl::Encoding::writeEncodingReturningNext(num_bytes, bytes.get()); for (StatName stat_name : stat_names) { const uint64_t stat_name_bytes = stat_name.dataSize(); memcpy(p, stat_name.data(), stat_name_bytes); diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 1a594a444077..95291fd037aa 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -120,19 +120,33 @@ class SymbolTableImpl : public SymbolTable { */ uint64_t moveToStorage(SymbolTable::Storage array); - static uint64_t decode(const uint8_t* encoding); + /** + * @param number A number to encode in a variable length byte-array. + * @return The number of bytes it would take to encode the number. + */ static uint64_t encodingSizeBytes(uint64_t number); /** - * Saves the specified length into the byte array, returning the next byte. + * Saves the specified number into the byte array, returning the next byte. * There is no guarantee that bytes will be aligned, so we can't cast to a * uint16_t* and assign, but must individually copy the bytes. * - * @param length the length in bytes to write. + * Requires that the buffer be sized to accomodate encodingSizeBytes(number). + * + * @param number the number to write. * @param bytes the pointer into which to write the length. * @return the pointer to the next byte for writing the data. */ - static uint8_t* encode(uint64_t number, uint8_t* buffer); + static uint8_t* writeEncodingReturningNext(uint64_t number, uint8_t* buffer); + + /** + * Decodes a byte-array containing a variable-length number. + * + * @param The encoded byte array, written previously by writeNumberReturningNext. + * @return The decoded number. + */ + static uint64_t decodeNumber(const uint8_t* encoding); + private: std::vector vec_; diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index e272bd1f59b1..966bcb73971f 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -72,8 +72,8 @@ class StatNameTest : public testing::TestWithParam { std::vector serializeDeserialize(uint64_t number) { uint64_t num_bytes = SymbolTableImpl::Encoding::encodingSizeBytes(number); uint8_t buf[10]; - EXPECT_EQ(buf + num_bytes, SymbolTableImpl::Encoding::encode(number, buf)) << number; - EXPECT_EQ(number, SymbolTableImpl::Encoding::decode(buf)) << number; + EXPECT_EQ(buf + num_bytes, SymbolTableImpl::Encoding::writeEncodingReturningNext(number, buf)) << number; + EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(buf)) << number; return std::vector(buf, buf + num_bytes); } From d14336502df5fd5d655290aa6cfbb2d274138f1b Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 28 Oct 2019 22:30:50 -0400 Subject: [PATCH 04/36] Share helper methods for encoding/decoding symbols and sizes. Signed-off-by: Joshua Marantz --- source/common/stats/fake_symbol_table_impl.h | 3 +- source/common/stats/symbol_table_impl.cc | 114 ++++++++----------- source/common/stats/symbol_table_impl.h | 29 ++--- test/common/stats/symbol_table_impl_test.cc | 3 +- 4 files changed, 67 insertions(+), 82 deletions(-) diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 34103fd0133b..2df6c98fe231 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -143,7 +143,8 @@ class FakeSymbolTableImpl : public SymbolTable { uint64_t bytes_required = SymbolTableImpl::Encoding::encodingSizeBytes(name.size()) + name.size(); auto bytes = std::make_unique(bytes_required); - uint8_t* buffer = SymbolTableImpl::Encoding::writeEncodingReturningNext(name.size(), bytes.get()); + uint8_t* buffer = + SymbolTableImpl::Encoding::writeEncodingReturningNext(name.size(), bytes.get()); memcpy(buffer, name.data(), name.size()); return bytes; } diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 83c6c3546647..4e93ffe64d61 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -16,23 +16,6 @@ namespace Stats { static const uint32_t SpilloverMask = 0x80; static const uint32_t Low7Bits = 0x7f; -uint64_t SymbolTableImpl::Encoding::bytesRequired() const { - uint64_t data_size = dataBytesRequired(); - return data_size + encodingSizeBytes(data_size); -} - -uint64_t SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { - uint64_t number = 0; - for (uint32_t shift = 0; true; ++encoding, shift += 7) { - uint64_t uc = static_cast(*encoding); - number |= (uc & Low7Bits) << shift; - if ((uc & SpilloverMask) == 0) { - break; - } - } - return number; -} - uint64_t StatName::dataSize() const { if (size_and_data_ == nullptr) { return 0; @@ -49,19 +32,6 @@ uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { return num_bytes; } -// writeLengthReturningNext ? -uint8_t* SymbolTableImpl::Encoding::writeEncodingReturningNext(uint64_t number, uint8_t* bytes) { - do { - if (number < (1 << 7)) { - *bytes++ = number; // number <= 127 get encoded in one byte. - } else { - *bytes++ = ((number & Low7Bits) | SpilloverMask); // number >= 128 need spillover bytes. - } - number >>= 7; - } while (number != 0); - return bytes; -} - #ifndef ENVOY_CONFIG_COVERAGE void StatName::debugPrint() { if (size_and_data_ == nullptr) { @@ -85,10 +55,10 @@ void StatName::debugPrint() { SymbolTableImpl::Encoding::~Encoding() { // Verifies that moveToStorage() was called on this encoding. Failure // to call moveToStorage() will result in leaks symbols. - ASSERT(vec_.empty()); + ASSERT(storage_ == nullptr); } -void SymbolTableImpl::Encoding::addSymbol(Symbol symbol) { +uint8_t* SymbolTableImpl::Encoding::writeEncodingReturningNext(uint64_t number, uint8_t* bytes) { // UTF-8-like encoding where a value 127 or less gets written as a single // byte. For higher values we write the low-order 7 bits with a 1 in // the high-order bit. Then we right-shift 7 bits and keep adding more bytes @@ -97,46 +67,60 @@ void SymbolTableImpl::Encoding::addSymbol(Symbol symbol) { // When decoding, we stop consuming uint8_t when we see a uint8_t with // high-order bit 0. do { - if (symbol < (1 << 7)) { - vec_.push_back(symbol); // symbols <= 127 get encoded in one byte. + if (number < (1 << 7)) { + *bytes++ = number; // number <= 127 get encoded in one byte. } else { - vec_.push_back((symbol & Low7Bits) | SpilloverMask); // symbols >= 128 need spillover bytes. + *bytes++ = ((number & Low7Bits) | SpilloverMask); // number >= 128 need spillover bytes. + } + number >>= 7; + } while (number != 0); + return bytes; +} + +void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { + ASSERT(data_bytes_required_ == 0); + num_symbols_ = symbols.size(); + for (Symbol symbol : symbols) { + data_bytes_required_ += encodingSizeBytes(symbol); + } + storage_ = std::make_unique(data_bytes_required_); + uint8_t* bytes = storage_.get(); + for (Symbol symbol : symbols) { + bytes = writeEncodingReturningNext(symbol, bytes); + } + ASSERT(static_cast(bytes - storage_.get()) == data_bytes_required_); +} + +uint64_t SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { + uint64_t number = 0; + for (uint32_t shift = 0; true; ++encoding, shift += 7) { + uint64_t uc = static_cast(*encoding); + number |= (uc & Low7Bits) << shift; + if ((uc & SpilloverMask) == 0) { + break; } - symbol >>= 7; - } while (symbol != 0); + } + return number; } SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage array, uint64_t size) { SymbolVec symbol_vec; - Symbol symbol = 0; - for (uint32_t shift = 0; size > 0; --size, ++array) { - uint32_t uc = static_cast(*array); - - // Inverse addSymbol encoding, walking down the bytes, shifting them into - // symbol, until a byte with a zero high order bit indicates this symbol is - // complete and we can move to the next one. - symbol |= (uc & Low7Bits) << shift; - if ((uc & SpilloverMask) == 0) { - symbol_vec.push_back(symbol); - shift = 0; - symbol = 0; - } else { - shift += 7; - } + symbol_vec.reserve(size); + for (; size > 0; --size) { + Symbol symbol = decodeNumber(array); + symbol_vec.push_back(symbol); + array += encodingSizeBytes(symbol); // don't recompute this! } return symbol_vec; } -uint64_t SymbolTableImpl::Encoding::moveToStorage(SymbolTable::Storage symbol_array) { - const uint64_t sz = dataBytesRequired(); - symbol_array = writeEncodingReturningNext(sz, symbol_array); - if (sz != 0) { - memcpy(symbol_array, vec_.data(), sz * sizeof(uint8_t)); +void SymbolTableImpl::Encoding::moveToStorage(SymbolTable::Storage symbol_array) { + symbol_array = writeEncodingReturningNext(num_symbols_, symbol_array); + if (data_bytes_required_ != 0) { + memcpy(symbol_array, storage_.get(), data_bytes_required_); } - vec_.clear(); // Logically transfer ownership, enabling empty assert on destruct. - return sz + SymbolTableImpl::Encoding::encodingSizeBytes(sz); - ; + storage_.reset(); // Logically transfer ownership, enabling empty assert on destruct. } SymbolTableImpl::SymbolTableImpl() @@ -176,9 +160,7 @@ void SymbolTableImpl::addTokensToEncoding(const absl::string_view name, Encoding } // Now efficiently encode the array of 32-bit symbols into a uint8_t array. - for (Symbol symbol : symbols) { - encoding.addSymbol(symbol); - } + encoding.addSymbols(symbols); } uint64_t SymbolTableImpl::numSymbols() const { @@ -503,8 +485,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con for (StatName stat_name : stat_names) { num_bytes += stat_name.dataSize(); } - auto bytes = std::make_unique(SymbolTableImpl::Encoding::encodingSizeBytes(num_bytes) + - num_bytes); + auto bytes = std::make_unique(Encoding::encodingSizeBytes(num_bytes) + num_bytes); uint8_t* p = SymbolTableImpl::Encoding::writeEncodingReturningNext(num_bytes, bytes.get()); for (StatName stat_name : stat_names) { const uint64_t stat_name_bytes = stat_name.dataSize(); @@ -534,7 +515,8 @@ void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_ uint8_t* p = &storage[0]; *p++ = num_names; for (auto& encoding : encodings) { - p += encoding.moveToStorage(p); + encoding.moveToStorage(p); + p += encoding.bytesRequired(); } // This assertion double-checks the arithmetic where we computed diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 95291fd037aa..150f858ced8c 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -30,12 +30,6 @@ namespace Stats { /** A Symbol represents a string-token with a small index. */ using Symbol = uint32_t; -/** - * We encode the byte-size of a StatName as its first two bytes. - */ -// constexpr uint64_t StatNameSizeEncodingBytes = 2; -// constexpr uint64_t StatNameMaxSize = 1 << (8 * StatNameSizeEncodingBytes); // 65536 - /** Transient representations of a vector of 32-bit symbols */ using SymbolVec = std::vector; @@ -93,7 +87,7 @@ class SymbolTableImpl : public SymbolTable { * * @param symbol the symbol to encode. */ - void addSymbol(Symbol symbol); + void addSymbols(const std::vector& symbols); /** * Decodes a uint8_t array into a SymbolVec. @@ -104,21 +98,27 @@ class SymbolTableImpl : public SymbolTable { * Returns the number of bytes required to represent StatName as a uint8_t * array, including the encoded size. */ - uint64_t bytesRequired() const; + uint64_t bytesRequired() const { + return data_bytes_required_ + encodingSizeBytes(num_symbols_); + } /** * @return the number of uint8_t entries we collected while adding symbols. */ - uint64_t dataBytesRequired() const { return vec_.size(); } + uint64_t dataBytesRequired() const { return data_bytes_required_; } + + /** + * @return the number of of symbols added. + */ + uint64_t numSymbols() const { return num_symbols_; } /** * Moves the contents of the vector into an allocated array. The array * must have been allocated with bytesRequired() bytes. * * @param array destination memory to receive the encoded bytes. - * @return uint64_t the number of bytes transferred. */ - uint64_t moveToStorage(SymbolTable::Storage array); + void moveToStorage(SymbolTable::Storage array); /** * @param number A number to encode in a variable length byte-array. @@ -131,7 +131,7 @@ class SymbolTableImpl : public SymbolTable { * There is no guarantee that bytes will be aligned, so we can't cast to a * uint16_t* and assign, but must individually copy the bytes. * - * Requires that the buffer be sized to accomodate encodingSizeBytes(number). + * Requires that the buffer be sized to accommodate encodingSizeBytes(number). * * @param number the number to write. * @param bytes the pointer into which to write the length. @@ -147,9 +147,10 @@ class SymbolTableImpl : public SymbolTable { */ static uint64_t decodeNumber(const uint8_t* encoding); - private: - std::vector vec_; + uint64_t data_bytes_required_{0}; + uint64_t num_symbols_{0}; + StoragePtr storage_; }; SymbolTableImpl(); diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index 966bcb73971f..7c9e79ce0506 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -72,7 +72,8 @@ class StatNameTest : public testing::TestWithParam { std::vector serializeDeserialize(uint64_t number) { uint64_t num_bytes = SymbolTableImpl::Encoding::encodingSizeBytes(number); uint8_t buf[10]; - EXPECT_EQ(buf + num_bytes, SymbolTableImpl::Encoding::writeEncodingReturningNext(number, buf)) << number; + EXPECT_EQ(buf + num_bytes, SymbolTableImpl::Encoding::writeEncodingReturningNext(number, buf)) + << number; EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(buf)) << number; return std::vector(buf, buf + num_bytes); } From 5ef1a4957957ba3eea465e2f111518fa756a91d6 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 28 Oct 2019 22:38:59 -0400 Subject: [PATCH 05/36] cleanup Signed-off-by: Joshua Marantz --- source/common/stats/symbol_table_impl.cc | 20 ++++++++++---------- source/common/stats/symbol_table_impl.h | 13 +++++++++---- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 4e93ffe64d61..62d924882f04 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -23,15 +23,6 @@ uint64_t StatName::dataSize() const { return SymbolTableImpl::Encoding::decodeNumber(size_and_data_); } -uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { - uint64_t num_bytes = 0; - do { - ++num_bytes; - number >>= 7; - } while (number != 0); - return num_bytes; -} - #ifndef ENVOY_CONFIG_COVERAGE void StatName::debugPrint() { if (size_and_data_ == nullptr) { @@ -58,6 +49,15 @@ SymbolTableImpl::Encoding::~Encoding() { ASSERT(storage_ == nullptr); } +uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { + uint64_t num_bytes = 0; + do { + ++num_bytes; + number >>= 7; + } while (number != 0); + return num_bytes; +} + uint8_t* SymbolTableImpl::Encoding::writeEncodingReturningNext(uint64_t number, uint8_t* bytes) { // UTF-8-like encoding where a value 127 or less gets written as a single // byte. For higher values we write the low-order 7 bits with a 1 in @@ -485,7 +485,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con for (StatName stat_name : stat_names) { num_bytes += stat_name.dataSize(); } - auto bytes = std::make_unique(Encoding::encodingSizeBytes(num_bytes) + num_bytes); + auto bytes = std::make_unique(Encoding::totalSizeBytes(num_bytes)); uint8_t* p = SymbolTableImpl::Encoding::writeEncodingReturningNext(num_bytes, bytes.get()); for (StatName stat_name : stat_names) { const uint64_t stat_name_bytes = stat_name.dataSize(); diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 150f858ced8c..01f7cf181ba9 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -126,6 +126,14 @@ class SymbolTableImpl : public SymbolTable { */ static uint64_t encodingSizeBytes(uint64_t number); + /** + * @param num_data_bytes The number of bytes in a data-block. + * @return The total number of bytes required for the data-block and its encoded size. + */ + static uint64_t totalSizeBytes(uint64_t num_data_bytes) { + return encodingSizeBytes(num_bytes) + num_bytes; + } + /** * Saves the specified number into the byte array, returning the next byte. * There is no guarantee that bytes will be aligned, so we can't cast to a @@ -381,10 +389,7 @@ class StatName { * @return uint64_t the number of bytes in the symbol array, including the two-byte * overhead for the size itself. */ - uint64_t size() const { - uint64_t sz = dataSize(); - return sz + SymbolTableImpl::Encoding::encodingSizeBytes(sz); - } + uint64_t size() const { return SymbolTableImpl::Encoding::totalSizeBytes(dataSize()); } void copyToStorage(SymbolTable::Storage storage) { memcpy(storage, size_and_data_, size()); } From c425551783d8721aba18fa04520c000d7d09b760 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 28 Oct 2019 22:41:08 -0400 Subject: [PATCH 06/36] typo Signed-off-by: Joshua Marantz --- source/common/stats/symbol_table_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 01f7cf181ba9..477ca7970f88 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -131,7 +131,7 @@ class SymbolTableImpl : public SymbolTable { * @return The total number of bytes required for the data-block and its encoded size. */ static uint64_t totalSizeBytes(uint64_t num_data_bytes) { - return encodingSizeBytes(num_bytes) + num_bytes; + return encodingSizeBytes(num_data_bytes) + num_data_bytes; } /** From f680cccd24892c677ee3b04fa834bd0dfc5b253b Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 29 Oct 2019 09:58:49 -0400 Subject: [PATCH 07/36] fix confusing about whether the encoding-length was the number of bytes or symbols. it's bytes. Signed-off-by: Joshua Marantz --- source/common/stats/symbol_table_impl.cc | 8 +++++--- source/common/stats/symbol_table_impl.h | 2 +- test/common/stats/symbol_table_impl_test.cc | 8 ++++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 62d924882f04..868dea540b08 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -107,16 +107,18 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar uint64_t size) { SymbolVec symbol_vec; symbol_vec.reserve(size); - for (; size > 0; --size) { + while (size > 0) { Symbol symbol = decodeNumber(array); symbol_vec.push_back(symbol); - array += encodingSizeBytes(symbol); // don't recompute this! + uint64_t symbol_size = encodingSizeBytes(symbol); // don't recompute this! + array += symbol_size; + size -= symbol_size; } return symbol_vec; } void SymbolTableImpl::Encoding::moveToStorage(SymbolTable::Storage symbol_array) { - symbol_array = writeEncodingReturningNext(num_symbols_, symbol_array); + symbol_array = writeEncodingReturningNext(data_bytes_required_, symbol_array); if (data_bytes_required_ != 0) { memcpy(symbol_array, storage_.get(), data_bytes_required_); } diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 477ca7970f88..e562b727a089 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -99,7 +99,7 @@ class SymbolTableImpl : public SymbolTable { * array, including the encoded size. */ uint64_t bytesRequired() const { - return data_bytes_required_ + encodingSizeBytes(num_symbols_); + return data_bytes_required_ + encodingSizeBytes(data_bytes_required_); } /** diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index 7c9e79ce0506..76274c21174d 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -139,6 +139,14 @@ TEST_P(StatNameTest, Test100KSymbolsRoundtrip) { } } +TEST_P(StatNameTest, TwoHundredTwoLevel) { + for (int i = 0; i < 200; ++i) { + const std::string stat_name = absl::StrCat("symbol_", i); + EXPECT_EQ(stat_name, encodeDecode(stat_name)); + } + EXPECT_EQ("http.foo", encodeDecode("http.foo")); +} + TEST_P(StatNameTest, TestLongSymbolName) { std::string long_name(100000, 'a'); EXPECT_EQ(long_name, encodeDecode(long_name)); From 9fee9f5dd54d08c53f8723cda3b0931cda5596b3 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 29 Oct 2019 12:37:41 -0400 Subject: [PATCH 08/36] fix over-aggressive inequality for fake symbol tables Signed-off-by: Joshua Marantz --- test/integration/stats_integration_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/stats_integration_test.cc b/test/integration/stats_integration_test.cc index b0717cd6a8fb..951cc1fc683e 100644 --- a/test/integration/stats_integration_test.cc +++ b/test/integration/stats_integration_test.cc @@ -272,7 +272,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests // for details on how to fix. EXPECT_MEMORY_EQ(m_per_cluster, 42856); // 104 bytes higher than a debug build. - EXPECT_MEMORY_LE(m_per_cluster, 43000); + EXPECT_MEMORY_LE(m_per_cluster, 44000); } TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { From 184fbed7a12a29c5e6d218e962acab8272460beb Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 29 Oct 2019 12:57:45 -0400 Subject: [PATCH 09/36] don't recompute the number of bytes consumed. Signed-off-by: Joshua Marantz --- source/common/stats/symbol_table_impl.cc | 25 +++++++++------------ source/common/stats/symbol_table_impl.h | 10 ++------- test/common/stats/symbol_table_impl_test.cc | 2 +- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 868dea540b08..582a1e0731de 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -20,7 +20,7 @@ uint64_t StatName::dataSize() const { if (size_and_data_ == nullptr) { return 0; } - return SymbolTableImpl::Encoding::decodeNumber(size_and_data_); + return SymbolTableImpl::Encoding::decodeNumber(size_and_data_).first; } #ifndef ENVOY_CONFIG_COVERAGE @@ -79,7 +79,6 @@ uint8_t* SymbolTableImpl::Encoding::writeEncodingReturningNext(uint64_t number, void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { ASSERT(data_bytes_required_ == 0); - num_symbols_ = symbols.size(); for (Symbol symbol : symbols) { data_bytes_required_ += encodingSizeBytes(symbol); } @@ -91,16 +90,15 @@ void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { ASSERT(static_cast(bytes - storage_.get()) == data_bytes_required_); } -uint64_t SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { +std::pair SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { uint64_t number = 0; - for (uint32_t shift = 0; true; ++encoding, shift += 7) { - uint64_t uc = static_cast(*encoding); + uint64_t uc = SpilloverMask; + const uint8_t* start = encoding; + for (uint32_t shift = 0; (uc & SpilloverMask) != 0; ++encoding, shift += 7) { + uc = static_cast(*encoding); number |= (uc & Low7Bits) << shift; - if ((uc & SpilloverMask) == 0) { - break; - } } - return number; + return std::make_pair(number, encoding - start); } SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage array, @@ -108,11 +106,10 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar SymbolVec symbol_vec; symbol_vec.reserve(size); while (size > 0) { - Symbol symbol = decodeNumber(array); - symbol_vec.push_back(symbol); - uint64_t symbol_size = encodingSizeBytes(symbol); // don't recompute this! - array += symbol_size; - size -= symbol_size; + std::pair symbol_consumed = decodeNumber(array); + symbol_vec.push_back(symbol_consumed.first); + size -= symbol_consumed.second; + array += symbol_consumed.second; } return symbol_vec; } diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index e562b727a089..6323be92c6d5 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -107,11 +107,6 @@ class SymbolTableImpl : public SymbolTable { */ uint64_t dataBytesRequired() const { return data_bytes_required_; } - /** - * @return the number of of symbols added. - */ - uint64_t numSymbols() const { return num_symbols_; } - /** * Moves the contents of the vector into an allocated array. The array * must have been allocated with bytesRequired() bytes. @@ -151,13 +146,12 @@ class SymbolTableImpl : public SymbolTable { * Decodes a byte-array containing a variable-length number. * * @param The encoded byte array, written previously by writeNumberReturningNext. - * @return The decoded number. + * @return A pair containing the decoded number, and the number of bytes consumed from encoding. */ - static uint64_t decodeNumber(const uint8_t* encoding); + static std::pair decodeNumber(const uint8_t* encoding); private: uint64_t data_bytes_required_{0}; - uint64_t num_symbols_{0}; StoragePtr storage_; }; diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index 76274c21174d..e6eabadd453f 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -74,7 +74,7 @@ class StatNameTest : public testing::TestWithParam { uint8_t buf[10]; EXPECT_EQ(buf + num_bytes, SymbolTableImpl::Encoding::writeEncodingReturningNext(number, buf)) << number; - EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(buf)) << number; + EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(buf).first) << number; return std::vector(buf, buf + num_bytes); } From 9c62a9eeb356c46b06a8d0eb174ea6cf2c004ae7 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 29 Oct 2019 15:27:04 -0400 Subject: [PATCH 10/36] remove dynamo-stats test change which is not needed Signed-off-by: Joshua Marantz --- test/extensions/filters/http/dynamo/BUILD | 1 - test/extensions/filters/http/dynamo/dynamo_stats_test.cc | 2 -- 2 files changed, 3 deletions(-) diff --git a/test/extensions/filters/http/dynamo/BUILD b/test/extensions/filters/http/dynamo/BUILD index bdaa4869c276..9d87a0531b40 100644 --- a/test/extensions/filters/http/dynamo/BUILD +++ b/test/extensions/filters/http/dynamo/BUILD @@ -46,7 +46,6 @@ envoy_extension_cc_test( deps = [ "//source/common/stats:stats_lib", "//source/extensions/filters/http/dynamo:dynamo_stats_lib", - "//test/common/stats:stat_test_utility_lib", "//test/mocks/stats:stats_mocks", ], ) diff --git a/test/extensions/filters/http/dynamo/dynamo_stats_test.cc b/test/extensions/filters/http/dynamo/dynamo_stats_test.cc index 232ca007f043..d479f4a1092f 100644 --- a/test/extensions/filters/http/dynamo/dynamo_stats_test.cc +++ b/test/extensions/filters/http/dynamo/dynamo_stats_test.cc @@ -2,7 +2,6 @@ #include "extensions/filters/http/dynamo/dynamo_stats.h" -#include "test/common/stats/stat_test_utility.h" #include "test/mocks/stats/mocks.h" #include "gmock/gmock.h" @@ -15,7 +14,6 @@ namespace Dynamo { namespace { TEST(DynamoStats, PartitionIdStatString) { - Stats::TestUtil::SymbolTableCreatorTestPeer::setUseFakeSymbolTables(true); Stats::IsolatedStoreImpl store; auto build_partition_string = [&store](const std::string& stat_prefix, const std::string& table_name, From 4482ca7c075beabe183873eb567d569452ce73e8 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 29 Oct 2019 16:53:16 -0400 Subject: [PATCH 11/36] use totalSizeBytes to simplify some code. Signed-off-by: Joshua Marantz --- source/common/stats/fake_symbol_table_impl.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 2df6c98fe231..8fc0c8891b25 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -67,7 +67,7 @@ class FakeSymbolTableImpl : public SymbolTable { size_t total_size_bytes = 1; /* one byte for holding the number of names */ for (uint32_t i = 0; i < num_names; ++i) { size_t name_size = names[i].size(); - total_size_bytes += SymbolTableImpl::Encoding::encodingSizeBytes(name_size) + name_size; + total_size_bytes += SymbolTableImpl::Encoding::totalSizeBytes(name_size); } // Now allocate the exact number of bytes required and move the encodings @@ -140,9 +140,7 @@ class FakeSymbolTableImpl : public SymbolTable { StoragePtr encodeHelper(absl::string_view name) const { ASSERT(!absl::EndsWith(name, ".")); - uint64_t bytes_required = - SymbolTableImpl::Encoding::encodingSizeBytes(name.size()) + name.size(); - auto bytes = std::make_unique(bytes_required); + auto bytes = std::make_unique(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); uint8_t* buffer = SymbolTableImpl::Encoding::writeEncodingReturningNext(name.size(), bytes.get()); memcpy(buffer, name.data(), name.size()); From 4d345d0f3ca72597d053805a41d5fa02fedd0ab1 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Fri, 1 Nov 2019 22:23:22 -0400 Subject: [PATCH 12/36] add MemBlock abstraction to isolate risky memory operations. Signed-off-by: Joshua Marantz --- source/common/common/BUILD | 6 +++ source/common/common/mem_block.h | 51 ++++++++++++++++++++ source/common/stats/BUILD | 1 + source/common/stats/symbol_table_impl.cc | 59 ++++++++++++++++-------- source/common/stats/symbol_table_impl.h | 12 +++-- 5 files changed, 105 insertions(+), 24 deletions(-) create mode 100644 source/common/common/mem_block.h diff --git a/source/common/common/BUILD b/source/common/common/BUILD index c8e4b2ae5b22..675d203ff40b 100644 --- a/source/common/common/BUILD +++ b/source/common/common/BUILD @@ -113,6 +113,12 @@ envoy_cc_library( deps = [":assert_lib"], ) +envoy_cc_library( + name = "mem_block_lib", + hdrs = ["mem_block.h"], + deps = [":assert_lib"], +) + # Contains macros and helpers for dumpState utilities envoy_cc_library( name = "dump_state_utils", diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block.h new file mode 100644 index 000000000000..6fe15dd71b98 --- /dev/null +++ b/source/common/common/mem_block.h @@ -0,0 +1,51 @@ +#pragma once + +#include + +#include "common/common/assert.h" + +namespace Envoy { + +template class MemBlock { + public: + // Constructs a MemBlock wrapper to an existing memory block. The caller + // is responsible for ensuring that 'data' has size 'size'. + explicit MemBlock(uint64_t size) : data_(std::make_unique(size)), size_(size), + next_(data_.get()) {} + + T* data() { return data_.get(); } + + void copyFrom(const MemBlock& src, uint64_t size, + uint64_t src_offset = 0, uint64_t dst_offset = 0) { + ASSERT(src_offset + size <= src.size_); + ASSERT(dst_offset + size <= size_); + memcpy(data_.get() + dst_offset, src.data_ + src_offset, size); + next_ += size; + } + + void dangerousCopyFrom(const T* source, uint64_t size, uint64_t dst_offset = 0) { + ASSERT(dst_offset + size <= size_); + memcpy(data_.get() + dst_offset, source, size); + next_ += size; + } + + void push_back(T byte) { + ASSERT(static_cast((next_ - data_.get()) + 1) <= size_); + *next_++ = byte; + } + + void append(const T* byte, uint64_t size) { + ASSERT(static_cast((next_ - data_.get()) + size) <= size_); + memcpy(next_, byte, size); + next_ += size; + } + + std::unique_ptr release() { return std::move(data_); } + + private: + std::unique_ptr data_; + uint64_t size_; + uint8_t* next_; +}; + +} // namespace Envoy diff --git a/source/common/stats/BUILD b/source/common/stats/BUILD index 80bde943e334..95e35b137a29 100644 --- a/source/common/stats/BUILD +++ b/source/common/stats/BUILD @@ -160,6 +160,7 @@ envoy_cc_library( "//include/envoy/stats:symbol_table_interface", "//source/common/common:assert_lib", "//source/common/common:logger_lib", + "//source/common/common:mem_block_lib", "//source/common/common:stack_array", "//source/common/common:thread_lib", "//source/common/common:utility_lib", diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 582a1e0731de..9dd220c4b6eb 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -77,6 +77,24 @@ uint8_t* SymbolTableImpl::Encoding::writeEncodingReturningNext(uint64_t number, return bytes; } +void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, MemBlock& mem_block) { + // UTF-8-like encoding where a value 127 or less gets written as a single + // byte. For higher values we write the low-order 7 bits with a 1 in + // the high-order bit. Then we right-shift 7 bits and keep adding more bytes + // until we have consumed all the non-zero bits in symbol. + // + // When decoding, we stop consuming uint8_t when we see a uint8_t with + // high-order bit 0. + do { + if (number < (1 << 7)) { + mem_block.push_back(number); // number <= 127 get encoded in one byte. + } else { + mem_block.push_back((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes. + } + number >>= 7; + } while (number != 0); +} + void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { ASSERT(data_bytes_required_ == 0); for (Symbol symbol : symbols) { @@ -114,10 +132,11 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar return symbol_vec; } -void SymbolTableImpl::Encoding::moveToStorage(SymbolTable::Storage symbol_array) { - symbol_array = writeEncodingReturningNext(data_bytes_required_, symbol_array); +void SymbolTableImpl::Encoding::moveToStorage(MemBlock& mem_block, uint64_t dst_offset) { + uint8_t* p = mem_block.data() + dst_offset; + dst_offset += writeEncodingReturningNext(data_bytes_required_, p) - p; if (data_bytes_required_ != 0) { - memcpy(symbol_array, storage_.get(), data_bytes_required_); + mem_block.dangerousCopyFrom(storage_.get(), data_bytes_required_, dst_offset); } storage_.reset(); // Logically transfer ownership, enabling empty assert on destruct. } @@ -407,9 +426,9 @@ SymbolTable::StoragePtr SymbolTableImpl::encode(absl::string_view name) { ASSERT(!absl::EndsWith(name, ".")); Encoding encoding; addTokensToEncoding(name, encoding); - auto bytes = std::make_unique(encoding.bytesRequired()); - encoding.moveToStorage(bytes.get()); - return bytes; + MemBlock mem_block(encoding.bytesRequired()); + encoding.moveToStorage(mem_block); + return mem_block.release(); } StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) @@ -417,8 +436,9 @@ StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { const uint64_t size = src.size(); - bytes_ = std::make_unique(size); - src.copyToStorage(bytes_.get()); + MemBlock storage(size); + src.copyToStorage(storage); + bytes_ = storage.release(); table.incRefCount(statName()); } @@ -484,14 +504,13 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con for (StatName stat_name : stat_names) { num_bytes += stat_name.dataSize(); } - auto bytes = std::make_unique(Encoding::totalSizeBytes(num_bytes)); - uint8_t* p = SymbolTableImpl::Encoding::writeEncodingReturningNext(num_bytes, bytes.get()); + MemBlock mem_block(Encoding::totalSizeBytes(num_bytes)); + Encoding::appendEncoding(num_bytes, mem_block); for (StatName stat_name : stat_names) { const uint64_t stat_name_bytes = stat_name.dataSize(); - memcpy(p, stat_name.data(), stat_name_bytes); - p += stat_name_bytes; + mem_block.append(stat_name.data(), stat_name_bytes); } - return bytes; + return mem_block.release(); } void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_names, @@ -510,20 +529,20 @@ void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_ // Now allocate the exact number of bytes required and move the encodings // into storage. - auto storage = std::make_unique(total_size_bytes); - uint8_t* p = &storage[0]; - *p++ = num_names; + MemBlock mem_block(total_size_bytes); + mem_block.data()[0] = num_names; + uint64_t offset = 1; for (auto& encoding : encodings) { - encoding.moveToStorage(p); - p += encoding.bytesRequired(); + encoding.moveToStorage(mem_block, offset); + offset += encoding.bytesRequired(); } // This assertion double-checks the arithmetic where we computed // total_size_bytes. After appending all the encoded data into the // allocated byte array, we should wind up with a pointer difference of // total_size_bytes from the beginning of the allocation. - ASSERT(p == &storage[0] + total_size_bytes); - list.moveStorageIntoList(std::move(storage)); + //ASSERT(p == &storage[0] + total_size_bytes); + list.moveStorageIntoList(mem_block.release()); } StatNameList::~StatNameList() { ASSERT(!populated()); } diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 6323be92c6d5..af76a72f35c6 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -14,6 +14,7 @@ #include "common/common/assert.h" #include "common/common/hash.h" #include "common/common/lock_guard.h" +#include "common/common/mem_block.h" #include "common/common/non_copyable.h" #include "common/common/stack_array.h" #include "common/common/thread.h" @@ -113,7 +114,7 @@ class SymbolTableImpl : public SymbolTable { * * @param array destination memory to receive the encoded bytes. */ - void moveToStorage(SymbolTable::Storage array); + void moveToStorage(MemBlock& array, uint64_t dst_offset = 0); /** * @param number A number to encode in a variable length byte-array. @@ -137,10 +138,11 @@ class SymbolTableImpl : public SymbolTable { * Requires that the buffer be sized to accommodate encodingSizeBytes(number). * * @param number the number to write. - * @param bytes the pointer into which to write the length. + * @param bytes the pointer into which to write the number. * @return the pointer to the next byte for writing the data. */ - static uint8_t* writeEncodingReturningNext(uint64_t number, uint8_t* buffer); + static uint8_t* writeEncodingReturningNext(uint64_t number, uint8_t* bytes); + static void appendEncoding(uint64_t number, MemBlock& mem_block); /** * Decodes a byte-array containing a variable-length number. @@ -385,7 +387,9 @@ class StatName { */ uint64_t size() const { return SymbolTableImpl::Encoding::totalSizeBytes(dataSize()); } - void copyToStorage(SymbolTable::Storage storage) { memcpy(storage, size_and_data_, size()); } + void copyToStorage(MemBlock& storage) { + storage.dangerousCopyFrom(size_and_data_, size()); + } #ifndef ENVOY_CONFIG_COVERAGE void debugPrint(); From 5b6f0dbd7b4d9cb8346d25e858ce80ad55384fa8 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 00:02:59 -0400 Subject: [PATCH 13/36] reduce need to do byte access to the memory buffers. Signed-off-by: Joshua Marantz --- source/common/common/mem_block.h | 27 +++++++++++++++++++++-- source/common/stats/symbol_table_impl.cc | 28 ++++++++++-------------- source/common/stats/symbol_table_impl.h | 6 +++-- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block.h index 6fe15dd71b98..10f1c6a6d904 100644 --- a/source/common/common/mem_block.h +++ b/source/common/common/mem_block.h @@ -12,6 +12,13 @@ template class MemBlock { // is responsible for ensuring that 'data' has size 'size'. explicit MemBlock(uint64_t size) : data_(std::make_unique(size)), size_(size), next_(data_.get()) {} + explicit MemBlock() : size_(0), next_(nullptr) {} + + void populate(uint64_t size) { + data_ = std::make_unique(size); + size_ = size; + next_ = data_.get(); + } T* data() { return data_.get(); } @@ -30,16 +37,32 @@ template class MemBlock { } void push_back(T byte) { - ASSERT(static_cast((next_ - data_.get()) + 1) <= size_); + ASSERT(bytesRemaining() >= 1); *next_++ = byte; } + uint64_t bytesRemaining() const { + return (data_.get() + size_) - next_; + } + void append(const T* byte, uint64_t size) { - ASSERT(static_cast((next_ - data_.get()) + size) <= size_); + ASSERT(bytesRemaining() >= size); memcpy(next_, byte, size); next_ += size; } + void append(const MemBlock& src) { + ASSERT(bytesRemaining() >= src.size_); + memcpy(next_, src.data_.get(), src.size_); + next_ += src.size_; + } + + void reset() { + data_.reset(); + size_ = 0; + next_ = nullptr; + } + std::unique_ptr release() { return std::move(data_); } private: diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 9dd220c4b6eb..45e59f6b661b 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -46,7 +46,7 @@ void StatName::debugPrint() { SymbolTableImpl::Encoding::~Encoding() { // Verifies that moveToStorage() was called on this encoding. Failure // to call moveToStorage() will result in leaks symbols. - ASSERT(storage_ == nullptr); + ASSERT(mem_block_.data() == nullptr); } uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { @@ -100,12 +100,11 @@ void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { for (Symbol symbol : symbols) { data_bytes_required_ += encodingSizeBytes(symbol); } - storage_ = std::make_unique(data_bytes_required_); - uint8_t* bytes = storage_.get(); + mem_block_.populate(data_bytes_required_); for (Symbol symbol : symbols) { - bytes = writeEncodingReturningNext(symbol, bytes); + appendEncoding(symbol, mem_block_); } - ASSERT(static_cast(bytes - storage_.get()) == data_bytes_required_); + //ASSERT(static_cast(bytes - storage_.get()) == data_bytes_required_); } std::pair SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { @@ -132,13 +131,10 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar return symbol_vec; } -void SymbolTableImpl::Encoding::moveToStorage(MemBlock& mem_block, uint64_t dst_offset) { - uint8_t* p = mem_block.data() + dst_offset; - dst_offset += writeEncodingReturningNext(data_bytes_required_, p) - p; - if (data_bytes_required_ != 0) { - mem_block.dangerousCopyFrom(storage_.get(), data_bytes_required_, dst_offset); - } - storage_.reset(); // Logically transfer ownership, enabling empty assert on destruct. +void SymbolTableImpl::Encoding::moveToStorage(MemBlock& mem_block) { + appendEncoding(data_bytes_required_, mem_block); + mem_block.append(mem_block_); + mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct. } SymbolTableImpl::SymbolTableImpl() @@ -426,7 +422,7 @@ SymbolTable::StoragePtr SymbolTableImpl::encode(absl::string_view name) { ASSERT(!absl::EndsWith(name, ".")); Encoding encoding; addTokensToEncoding(name, encoding); - MemBlock mem_block(encoding.bytesRequired()); + MemBlock mem_block(Encoding::totalSizeBytes(encoding.bytesRequired())); encoding.moveToStorage(mem_block); return mem_block.release(); } @@ -530,11 +526,9 @@ void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_ // Now allocate the exact number of bytes required and move the encodings // into storage. MemBlock mem_block(total_size_bytes); - mem_block.data()[0] = num_names; - uint64_t offset = 1; + mem_block.push_back(num_names); for (auto& encoding : encodings) { - encoding.moveToStorage(mem_block, offset); - offset += encoding.bytesRequired(); + encoding.moveToStorage(mem_block); } // This assertion double-checks the arithmetic where we computed diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index af76a72f35c6..487b97a04d3c 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -114,7 +114,7 @@ class SymbolTableImpl : public SymbolTable { * * @param array destination memory to receive the encoded bytes. */ - void moveToStorage(MemBlock& array, uint64_t dst_offset = 0); + void moveToStorage(MemBlock& array); /** * @param number A number to encode in a variable length byte-array. @@ -152,9 +152,11 @@ class SymbolTableImpl : public SymbolTable { */ static std::pair decodeNumber(const uint8_t* encoding); + StoragePtr release() { return mem_block_.release(); } + private: uint64_t data_bytes_required_{0}; - StoragePtr storage_; + MemBlock mem_block_; }; SymbolTableImpl(); From 70ffe338cba88597486133d7662688aae62c93ea Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 00:17:33 -0400 Subject: [PATCH 14/36] remove remainder of memcpy and pointer arithmetic outside MemBlock class. Signed-off-by: Joshua Marantz --- source/common/common/mem_block.h | 26 ++++++-------------- source/common/stats/fake_symbol_table_impl.h | 23 ++++++++--------- source/common/stats/symbol_table_impl.cc | 2 +- source/common/stats/symbol_table_impl.h | 4 +-- 4 files changed, 19 insertions(+), 36 deletions(-) diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block.h index 10f1c6a6d904..0af083e9d95f 100644 --- a/source/common/common/mem_block.h +++ b/source/common/common/mem_block.h @@ -20,21 +20,7 @@ template class MemBlock { next_ = data_.get(); } - T* data() { return data_.get(); } - - void copyFrom(const MemBlock& src, uint64_t size, - uint64_t src_offset = 0, uint64_t dst_offset = 0) { - ASSERT(src_offset + size <= src.size_); - ASSERT(dst_offset + size <= size_); - memcpy(data_.get() + dst_offset, src.data_ + src_offset, size); - next_ += size; - } - - void dangerousCopyFrom(const T* source, uint64_t size, uint64_t dst_offset = 0) { - ASSERT(dst_offset + size <= size_); - memcpy(data_.get() + dst_offset, source, size); - next_ += size; - } + bool empty() const { return data_ == nullptr; } void push_back(T byte) { ASSERT(bytesRemaining() >= 1); @@ -52,9 +38,7 @@ template class MemBlock { } void append(const MemBlock& src) { - ASSERT(bytesRemaining() >= src.size_); - memcpy(next_, src.data_.get(), src.size_); - next_ += src.size_; + append(src.data_.get(), src.size_); } void reset() { @@ -63,7 +47,11 @@ template class MemBlock { next_ = nullptr; } - std::unique_ptr release() { return std::move(data_); } + std::unique_ptr release() { + next_ = nullptr; + size_ = 0; + return std::move(data_); + } private: std::unique_ptr data_; diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 8fc0c8891b25..5363019c4f2a 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -72,16 +72,14 @@ class FakeSymbolTableImpl : public SymbolTable { // Now allocate the exact number of bytes required and move the encodings // into storage. - auto storage = std::make_unique(total_size_bytes); - uint8_t* p = &storage[0]; - *p++ = num_names; + MemBlock mem_block(total_size_bytes); + mem_block.push_back(num_names); for (uint32_t i = 0; i < num_names; ++i) { auto& name = names[i]; size_t sz = name.size(); - p = SymbolTableImpl::Encoding::writeEncodingReturningNext(sz, p); + SymbolTableImpl::Encoding::appendEncoding(sz, mem_block); if (!name.empty()) { - memcpy(p, name.data(), sz * sizeof(uint8_t)); - p += sz; + mem_block.append(reinterpret_cast(name.data()), sz); } } @@ -89,8 +87,8 @@ class FakeSymbolTableImpl : public SymbolTable { // total_size_bytes. After appending all the encoded data into the // allocated byte array, we should wind up with a pointer difference of // total_size_bytes from the beginning of the allocation. - ASSERT(p == &storage[0] + total_size_bytes); - list.moveStorageIntoList(std::move(storage)); + //ASSERT(p == &storage[0] + total_size_bytes); + list.moveStorageIntoList(mem_block.release()); } std::string toString(const StatName& stat_name) const override { @@ -140,11 +138,10 @@ class FakeSymbolTableImpl : public SymbolTable { StoragePtr encodeHelper(absl::string_view name) const { ASSERT(!absl::EndsWith(name, ".")); - auto bytes = std::make_unique(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); - uint8_t* buffer = - SymbolTableImpl::Encoding::writeEncodingReturningNext(name.size(), bytes.get()); - memcpy(buffer, name.data(), name.size()); - return bytes; + MemBlock mem_block(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); + SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); + mem_block.append(reinterpret_cast(name.data()), name.size()); + return mem_block.release();; } }; diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 45e59f6b661b..bd0a32da36c3 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -46,7 +46,7 @@ void StatName::debugPrint() { SymbolTableImpl::Encoding::~Encoding() { // Verifies that moveToStorage() was called on this encoding. Failure // to call moveToStorage() will result in leaks symbols. - ASSERT(mem_block_.data() == nullptr); + ASSERT(mem_block_.empty()); } uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 487b97a04d3c..0ca84d4f3e43 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -389,9 +389,7 @@ class StatName { */ uint64_t size() const { return SymbolTableImpl::Encoding::totalSizeBytes(dataSize()); } - void copyToStorage(MemBlock& storage) { - storage.dangerousCopyFrom(size_and_data_, size()); - } + void copyToStorage(MemBlock& storage) { storage.append(size_and_data_, size()); } #ifndef ENVOY_CONFIG_COVERAGE void debugPrint(); From 9f1eae74c839e2cfdc524533b0e2e5baa6a8854d Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 00:37:05 -0400 Subject: [PATCH 15/36] format Signed-off-by: Joshua Marantz --- source/common/common/mem_block.h | 18 +++++++----------- source/common/stats/fake_symbol_table_impl.h | 9 +++++---- source/common/stats/symbol_table_impl.cc | 8 ++++---- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block.h index 0af083e9d95f..460d6fcd015c 100644 --- a/source/common/common/mem_block.h +++ b/source/common/common/mem_block.h @@ -6,12 +6,12 @@ namespace Envoy { -template class MemBlock { - public: +template class MemBlock { +public: // Constructs a MemBlock wrapper to an existing memory block. The caller // is responsible for ensuring that 'data' has size 'size'. - explicit MemBlock(uint64_t size) : data_(std::make_unique(size)), size_(size), - next_(data_.get()) {} + explicit MemBlock(uint64_t size) + : data_(std::make_unique(size)), size_(size), next_(data_.get()) {} explicit MemBlock() : size_(0), next_(nullptr) {} void populate(uint64_t size) { @@ -27,9 +27,7 @@ template class MemBlock { *next_++ = byte; } - uint64_t bytesRemaining() const { - return (data_.get() + size_) - next_; - } + uint64_t bytesRemaining() const { return (data_.get() + size_) - next_; } void append(const T* byte, uint64_t size) { ASSERT(bytesRemaining() >= size); @@ -37,9 +35,7 @@ template class MemBlock { next_ += size; } - void append(const MemBlock& src) { - append(src.data_.get(), src.size_); - } + void append(const MemBlock& src) { append(src.data_.get(), src.size_); } void reset() { data_.reset(); @@ -53,7 +49,7 @@ template class MemBlock { return std::move(data_); } - private: +private: std::unique_ptr data_; uint64_t size_; uint8_t* next_; diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 5363019c4f2a..503b21878d6e 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -85,9 +85,9 @@ class FakeSymbolTableImpl : public SymbolTable { // This assertion double-checks the arithmetic where we computed // total_size_bytes. After appending all the encoded data into the - // allocated byte array, we should wind up with a pointer difference of - // total_size_bytes from the beginning of the allocation. - //ASSERT(p == &storage[0] + total_size_bytes); + // allocated byte array, we should have exhausted all the memory + // we though we needed. + ASSERT(mem_block.bytesRemaining() == 0); list.moveStorageIntoList(mem_block.release()); } @@ -141,7 +141,8 @@ class FakeSymbolTableImpl : public SymbolTable { MemBlock mem_block(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); mem_block.append(reinterpret_cast(name.data()), name.size()); - return mem_block.release();; + return mem_block.release(); + ; } }; diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index bd0a32da36c3..1e8adcd31cc1 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -104,7 +104,7 @@ void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { for (Symbol symbol : symbols) { appendEncoding(symbol, mem_block_); } - //ASSERT(static_cast(bytes - storage_.get()) == data_bytes_required_); + // ASSERT(static_cast(bytes - storage_.get()) == data_bytes_required_); } std::pair SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { @@ -533,9 +533,9 @@ void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_ // This assertion double-checks the arithmetic where we computed // total_size_bytes. After appending all the encoded data into the - // allocated byte array, we should wind up with a pointer difference of - // total_size_bytes from the beginning of the allocation. - //ASSERT(p == &storage[0] + total_size_bytes); + // allocated byte array, we should have exhausted all the memory + // we though we needed. + ASSERT(mem_block.bytesRemaining() == 0); list.moveStorageIntoList(mem_block.release()); } From cf33e186e6fbfef899899d86938952a699138d3c Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 08:59:30 -0400 Subject: [PATCH 16/36] some cleanup -- still needs more unit tests. Signed-off-by: Joshua Marantz --- source/common/common/mem_block.h | 84 +++++++++++++++----- source/common/stats/fake_symbol_table_impl.h | 8 +- source/common/stats/symbol_table_impl.cc | 33 ++------ source/common/stats/symbol_table_impl.h | 9 +-- test/common/stats/symbol_table_impl_test.cc | 14 ++-- 5 files changed, 89 insertions(+), 59 deletions(-) diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block.h index 460d6fcd015c..78ed4335752b 100644 --- a/source/common/common/mem_block.h +++ b/source/common/common/mem_block.h @@ -6,53 +6,101 @@ namespace Envoy { +// Manages a block of raw memory for objects of type T. T must be +// empty-constructible. template class MemBlock { public: - // Constructs a MemBlock wrapper to an existing memory block. The caller - // is responsible for ensuring that 'data' has size 'size'. + // Constructs a MemBlock of the specified size. explicit MemBlock(uint64_t size) - : data_(std::make_unique(size)), size_(size), next_(data_.get()) {} - explicit MemBlock() : size_(0), next_(nullptr) {} + : data_(std::make_unique(size)), size_(size), write_ptr_(data_.get()) {} + MemBlock() : size_(0), write_ptr_(nullptr) {} + // Populates (or repopulates) the MemBlock to make it the specified size. + // This does not have resize semantics; when populate() is called any + // previous contents are erased. void populate(uint64_t size) { data_ = std::make_unique(size); size_ = size; - next_ = data_.get(); + write_ptr_ = data_.get(); } + // Returns whether the block has been populated. bool empty() const { return data_ == nullptr; } - void push_back(T byte) { - ASSERT(bytesRemaining() >= 1); - *next_++ = byte; + /** + * Appends a single object of type T, moving an internal write-pointer + * forward. Asserts that there is room to write the object when compiled + * for debug. + * + * @param object the object to append. + */ + void appendOne(T object) { + ASSERT(capacityRemaining() >= 1); + *write_ptr_++ = object; } - uint64_t bytesRemaining() const { return (data_.get() + size_) - next_; } - - void append(const T* byte, uint64_t size) { - ASSERT(bytesRemaining() >= size); - memcpy(next_, byte, size); - next_ += size; + /** + * Appends raw data, moving an internal write-pointer forward. Asserts + * that there is room to write the block when compiled for debug. It is + * the caller's responsibility to ensure that the input data is valid. + * + * @param data The block of objects to insert. + * @param size The number of elements in the block. + */ + void appendData(const T* data, uint64_t size) { + ASSERT(capacityRemaining() >= size); + memcpy(write_ptr_, data, size * sizeof(T)); + write_ptr_ += size; } - void append(const MemBlock& src) { append(src.data_.get(), src.size_); } + /** + * Appends the contents of another memory block to this one. + * + * @param src the block to append. + */ + void appendBlock(const MemBlock& src) { appendData(src.data_.get(), src.size_); } + + /** + * @return the number of elements remaining in the MemBlock. + */ + uint64_t capacityRemaining() const { return (data_.get() + size_) - write_ptr_; } + /** + * Empties the contents of this. + */ void reset() { data_.reset(); size_ = 0; - next_ = nullptr; + write_ptr_ = nullptr; } + /** + * Returns the underlying storage as a unique pointer, clearing this. + * + * @return The transferred storage. + */ std::unique_ptr release() { - next_ = nullptr; + write_ptr_ = nullptr; size_ = 0; return std::move(data_); } + /** + * @return read-only access to the data. + */ + const T* data() const { return data_.get(); } + + /** + * This is exposed to help with unit testing. + * + * @return the populated data as a vector. + */ + std::vector toVector() const { return std::vector(data_.get(), write_ptr_); } + private: std::unique_ptr data_; uint64_t size_; - uint8_t* next_; + uint8_t* write_ptr_; }; } // namespace Envoy diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 503b21878d6e..8881406eb2d2 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -73,13 +73,13 @@ class FakeSymbolTableImpl : public SymbolTable { // Now allocate the exact number of bytes required and move the encodings // into storage. MemBlock mem_block(total_size_bytes); - mem_block.push_back(num_names); + mem_block.appendOne(num_names); for (uint32_t i = 0; i < num_names; ++i) { auto& name = names[i]; size_t sz = name.size(); SymbolTableImpl::Encoding::appendEncoding(sz, mem_block); if (!name.empty()) { - mem_block.append(reinterpret_cast(name.data()), sz); + mem_block.appendData(reinterpret_cast(name.data()), sz); } } @@ -87,7 +87,7 @@ class FakeSymbolTableImpl : public SymbolTable { // total_size_bytes. After appending all the encoded data into the // allocated byte array, we should have exhausted all the memory // we though we needed. - ASSERT(mem_block.bytesRemaining() == 0); + ASSERT(mem_block.capacityRemaining() == 0); list.moveStorageIntoList(mem_block.release()); } @@ -140,7 +140,7 @@ class FakeSymbolTableImpl : public SymbolTable { ASSERT(!absl::EndsWith(name, ".")); MemBlock mem_block(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); - mem_block.append(reinterpret_cast(name.data()), name.size()); + mem_block.appendData(reinterpret_cast(name.data()), name.size()); return mem_block.release(); ; } diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 1e8adcd31cc1..9579f95f1fe6 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -58,25 +58,6 @@ uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { return num_bytes; } -uint8_t* SymbolTableImpl::Encoding::writeEncodingReturningNext(uint64_t number, uint8_t* bytes) { - // UTF-8-like encoding where a value 127 or less gets written as a single - // byte. For higher values we write the low-order 7 bits with a 1 in - // the high-order bit. Then we right-shift 7 bits and keep adding more bytes - // until we have consumed all the non-zero bits in symbol. - // - // When decoding, we stop consuming uint8_t when we see a uint8_t with - // high-order bit 0. - do { - if (number < (1 << 7)) { - *bytes++ = number; // number <= 127 get encoded in one byte. - } else { - *bytes++ = ((number & Low7Bits) | SpilloverMask); // number >= 128 need spillover bytes. - } - number >>= 7; - } while (number != 0); - return bytes; -} - void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, MemBlock& mem_block) { // UTF-8-like encoding where a value 127 or less gets written as a single // byte. For higher values we write the low-order 7 bits with a 1 in @@ -87,9 +68,9 @@ void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, MemBlock= 128 need spillover bytes. + mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes. } number >>= 7; } while (number != 0); @@ -133,7 +114,7 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar void SymbolTableImpl::Encoding::moveToStorage(MemBlock& mem_block) { appendEncoding(data_bytes_required_, mem_block); - mem_block.append(mem_block_); + mem_block.appendBlock(mem_block_); mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct. } @@ -503,8 +484,8 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con MemBlock mem_block(Encoding::totalSizeBytes(num_bytes)); Encoding::appendEncoding(num_bytes, mem_block); for (StatName stat_name : stat_names) { - const uint64_t stat_name_bytes = stat_name.dataSize(); - mem_block.append(stat_name.data(), stat_name_bytes); + //mem_block.appendData(stat_name.data(), stat_name.dataSize()); + stat_name.copyDataToStorage(mem_block); } return mem_block.release(); } @@ -526,7 +507,7 @@ void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_ // Now allocate the exact number of bytes required and move the encodings // into storage. MemBlock mem_block(total_size_bytes); - mem_block.push_back(num_names); + mem_block.appendOne(num_names); for (auto& encoding : encodings) { encoding.moveToStorage(mem_block); } @@ -535,7 +516,7 @@ void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_ // total_size_bytes. After appending all the encoded data into the // allocated byte array, we should have exhausted all the memory // we though we needed. - ASSERT(mem_block.bytesRemaining() == 0); + ASSERT(mem_block.capacityRemaining() == 0); list.moveStorageIntoList(mem_block.release()); } diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 0ca84d4f3e43..0e2c307e393c 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -138,16 +138,14 @@ class SymbolTableImpl : public SymbolTable { * Requires that the buffer be sized to accommodate encodingSizeBytes(number). * * @param number the number to write. - * @param bytes the pointer into which to write the number. - * @return the pointer to the next byte for writing the data. + * @param MemBlock the memory into which to append the number. */ - static uint8_t* writeEncodingReturningNext(uint64_t number, uint8_t* bytes); static void appendEncoding(uint64_t number, MemBlock& mem_block); /** * Decodes a byte-array containing a variable-length number. * - * @param The encoded byte array, written previously by writeNumberReturningNext. + * @param The encoded byte array, written previously by appendEncoding. * @return A pair containing the decoded number, and the number of bytes consumed from encoding. */ static std::pair decodeNumber(const uint8_t* encoding); @@ -389,7 +387,8 @@ class StatName { */ uint64_t size() const { return SymbolTableImpl::Encoding::totalSizeBytes(dataSize()); } - void copyToStorage(MemBlock& storage) { storage.append(size_and_data_, size()); } + void copyToStorage(MemBlock& storage) { storage.appendData(size_and_data_, size()); } + void copyDataToStorage(MemBlock& storage) { storage.appendData(data(), dataSize()); } #ifndef ENVOY_CONFIG_COVERAGE void debugPrint(); diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index e6eabadd453f..9885a1b322eb 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -70,12 +70,14 @@ class StatNameTest : public testing::TestWithParam { StatName makeStat(absl::string_view name) { return pool_->add(name); } std::vector serializeDeserialize(uint64_t number) { - uint64_t num_bytes = SymbolTableImpl::Encoding::encodingSizeBytes(number); - uint8_t buf[10]; - EXPECT_EQ(buf + num_bytes, SymbolTableImpl::Encoding::writeEncodingReturningNext(number, buf)) - << number; - EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(buf).first) << number; - return std::vector(buf, buf + num_bytes); + const uint64_t num_bytes = SymbolTableImpl::Encoding::encodingSizeBytes(number); + const uint64_t block_size = 10; + MemBlock mem_block(block_size); + SymbolTableImpl::Encoding::appendEncoding(number, mem_block); + EXPECT_EQ(block_size, num_bytes + mem_block.capacityRemaining()); + EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(mem_block.data()).first) << number; + //return std::vector(buf, buf + num_bytes); + return mem_block.toVector(); //std::vector(buf, buf + num_bytes); } FakeSymbolTableImpl* fake_symbol_table_{nullptr}; From 14b8740b250bb7a3d46190936d80d83c161bd523 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 10:32:30 -0400 Subject: [PATCH 17/36] remove superfluous methods. Signed-off-by: Joshua Marantz --- source/common/common/mem_block.h | 12 ++++-------- test/common/stats/symbol_table_impl_test.cc | 6 +++--- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block.h index 78ed4335752b..a939aeba8124 100644 --- a/source/common/common/mem_block.h +++ b/source/common/common/mem_block.h @@ -7,7 +7,8 @@ namespace Envoy { // Manages a block of raw memory for objects of type T. T must be -// empty-constructible. +// empty-constructible. This class carries extra member variables +// for tracking size, and a write-pointer to support safe appends. template class MemBlock { public: // Constructs a MemBlock of the specified size. @@ -86,14 +87,9 @@ template class MemBlock { } /** - * @return read-only access to the data. - */ - const T* data() const { return data_.get(); } - - /** - * This is exposed to help with unit testing. - * * @return the populated data as a vector. + * + * This is exposed to help with unit testing. */ std::vector toVector() const { return std::vector(data_.get(), write_ptr_); } diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index 9885a1b322eb..2466a78f323c 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -75,9 +75,9 @@ class StatNameTest : public testing::TestWithParam { MemBlock mem_block(block_size); SymbolTableImpl::Encoding::appendEncoding(number, mem_block); EXPECT_EQ(block_size, num_bytes + mem_block.capacityRemaining()); - EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(mem_block.data()).first) << number; - //return std::vector(buf, buf + num_bytes); - return mem_block.toVector(); //std::vector(buf, buf + num_bytes); + std::vector vec = mem_block.toVector(); + EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(&vec[0]).first) << number; + return vec; } FakeSymbolTableImpl* fake_symbol_table_{nullptr}; From eb6eb51dae3f9dffdebf08c8eef345ff9e8f4ddd Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 10:58:16 -0400 Subject: [PATCH 18/36] add unit test. Signed-off-by: Joshua Marantz --- source/common/common/mem_block.h | 43 ++++++++++------ source/common/stats/symbol_table_impl.cc | 2 +- test/common/common/BUILD | 6 +++ test/common/common/mem_block_test.cc | 64 ++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 16 deletions(-) create mode 100644 test/common/common/mem_block_test.cc diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block.h index a939aeba8124..62f865b2e435 100644 --- a/source/common/common/mem_block.h +++ b/source/common/common/mem_block.h @@ -9,24 +9,35 @@ namespace Envoy { // Manages a block of raw memory for objects of type T. T must be // empty-constructible. This class carries extra member variables // for tracking size, and a write-pointer to support safe appends. +// +// MemBlock is used to efficiently write blocks of data into a memory +// buffer. Due to the extra member variables, it is not optimal for +// storing in data structures. Instead, the raw assembled memory block +// is released from the MemBlock after assembly. template class MemBlock { public: // Constructs a MemBlock of the specified size. explicit MemBlock(uint64_t size) - : data_(std::make_unique(size)), size_(size), write_ptr_(data_.get()) {} - MemBlock() : size_(0), write_ptr_(nullptr) {} + : data_(std::make_unique(size)), capacity_(size), write_ptr_(data_.get()) {} + MemBlock() : capacity_(0), write_ptr_(nullptr) {} - // Populates (or repopulates) the MemBlock to make it the specified size. - // This does not have resize semantics; when populate() is called any - // previous contents are erased. + /** + * Populates (or repopulates) the MemBlock to make it the specified size. + * This does not have resize semantics; when populate() is called any + * previous contents are erased. + * + * @param size The number of memory elements to allocate. + */ void populate(uint64_t size) { data_ = std::make_unique(size); - size_ = size; + capacity_ = size; write_ptr_ = data_.get(); } - // Returns whether the block has been populated. - bool empty() const { return data_ == nullptr; } + /** + * @return the capacity. + */ + uint64_t capacity() const { return capacity_; } /** * Appends a single object of type T, moving an internal write-pointer @@ -59,30 +70,30 @@ template class MemBlock { * * @param src the block to append. */ - void appendBlock(const MemBlock& src) { appendData(src.data_.get(), src.size_); } + void appendBlock(const MemBlock& src) { appendData(src.data_.get(), src.size()); } /** * @return the number of elements remaining in the MemBlock. */ - uint64_t capacityRemaining() const { return (data_.get() + size_) - write_ptr_; } + uint64_t capacityRemaining() const { return (data_.get() + capacity_) - write_ptr_; } /** * Empties the contents of this. */ void reset() { data_.reset(); - size_ = 0; + capacity_ = 0; write_ptr_ = nullptr; } /** * Returns the underlying storage as a unique pointer, clearing this. * - * @return The transferred storage. + * @return the transferred storage. */ std::unique_ptr release() { write_ptr_ = nullptr; - size_ = 0; + capacity_ = 0; return std::move(data_); } @@ -94,9 +105,11 @@ template class MemBlock { std::vector toVector() const { return std::vector(data_.get(), write_ptr_); } private: + uint64_t size() const { return write_ptr_ - data_.get(); } + std::unique_ptr data_; - uint64_t size_; - uint8_t* write_ptr_; + uint64_t capacity_; + T* write_ptr_; }; } // namespace Envoy diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 9579f95f1fe6..cbd8259831cd 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -46,7 +46,7 @@ void StatName::debugPrint() { SymbolTableImpl::Encoding::~Encoding() { // Verifies that moveToStorage() was called on this encoding. Failure // to call moveToStorage() will result in leaks symbols. - ASSERT(mem_block_.empty()); + ASSERT(mem_block_.capacity() == 0); } uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { diff --git a/test/common/common/BUILD b/test/common/common/BUILD index c9024dba5e8d..d10affe13601 100644 --- a/test/common/common/BUILD +++ b/test/common/common/BUILD @@ -68,6 +68,12 @@ envoy_cc_test( deps = ["//source/common/common:cleanup_lib"], ) +envoy_cc_test( + name = "mem_block_test", + srcs = ["mem_block_test.cc"], + deps = ["//source/common/common:mem_block_lib"], +) + envoy_cc_test( name = "phantom_test", srcs = ["phantom_test.cc"], diff --git a/test/common/common/mem_block_test.cc b/test/common/common/mem_block_test.cc new file mode 100644 index 000000000000..4bc9cb0f5f35 --- /dev/null +++ b/test/common/common/mem_block_test.cc @@ -0,0 +1,64 @@ +#include "common/common/mem_block.h" + +#include "gtest/gtest.h" + +namespace Envoy { + +TEST(MemBlockTest, AppendUint8) { + MemBlock mem_block(10); + EXPECT_EQ(10, mem_block.capacity()); + mem_block.appendOne(5); + EXPECT_EQ(9, mem_block.capacityRemaining()); + const uint8_t foo[] = {6, 7}; + mem_block.appendData(foo, ABSL_ARRAYSIZE(foo)); + EXPECT_EQ(7, mem_block.capacityRemaining()); + + MemBlock append; + EXPECT_EQ(0, append.capacity()); + append.populate(7); + EXPECT_EQ(7, append.capacity()); + append.appendOne(8); + append.appendOne(9); + mem_block.appendBlock(append); + + EXPECT_EQ(5, mem_block.capacityRemaining()); + EXPECT_EQ((std::vector{5, 6, 7, 8, 9}), mem_block.toVector()); + + append.appendBlock(mem_block); + EXPECT_EQ(0, append.capacityRemaining()); + EXPECT_EQ((std::vector{8, 9, 5, 6, 7, 8, 9}), append.toVector()); + + mem_block.reset(); + EXPECT_EQ(0, mem_block.capacity()); +} + +TEST(MemBlockTest, AppendUint32) { + MemBlock mem_block(10); + EXPECT_EQ(10, mem_block.capacity()); + mem_block.appendOne(100005); + EXPECT_EQ(9, mem_block.capacityRemaining()); + const uint32_t foo[] = {100006, 100007}; + mem_block.appendData(foo, ABSL_ARRAYSIZE(foo)); + EXPECT_EQ(7, mem_block.capacityRemaining()); + + MemBlock append; + EXPECT_EQ(0, append.capacity()); + append.populate(7); + EXPECT_EQ(7, append.capacity()); + append.appendOne(100008); + append.appendOne(100009); + mem_block.appendBlock(append); + + EXPECT_EQ(5, mem_block.capacityRemaining()); + EXPECT_EQ((std::vector{100005, 100006, 100007, 100008, 100009}), mem_block.toVector()); + + append.appendBlock(mem_block); + EXPECT_EQ(0, append.capacityRemaining()); + EXPECT_EQ((std::vector{100008, 100009, 100005, 100006, 100007, 100008, 100009}), + append.toVector()); + + mem_block.reset(); + EXPECT_EQ(0, mem_block.capacity()); +} + +} // namespace Envoy From 6d4b8bcf99c09f6e5ff7d4938599b14dbd698112 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 11:06:08 -0400 Subject: [PATCH 19/36] improve class comment. Signed-off-by: Joshua Marantz --- source/common/common/mem_block.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block.h index 62f865b2e435..588024900bed 100644 --- a/source/common/common/mem_block.h +++ b/source/common/common/mem_block.h @@ -1,19 +1,25 @@ #pragma once #include +#include #include "common/common/assert.h" namespace Envoy { // Manages a block of raw memory for objects of type T. T must be -// empty-constructible. This class carries extra member variables -// for tracking size, and a write-pointer to support safe appends. +// empty-constructible. This class carries extra member variables for tracking +// size, and a write-pointer to support safe appends. // -// MemBlock is used to efficiently write blocks of data into a memory -// buffer. Due to the extra member variables, it is not optimal for -// storing in data structures. Instead, the raw assembled memory block -// is released from the MemBlock after assembly. +// MemBlock is used to safely write blocks of data into a memory buffer. Due to +// two extra member variables, it is not optimal for storing in data +// structures. The intended usage model is to release the raw assembled memory +// block from the MemBlock for efficient storage. +// +// The goal for this class is to provide a usage model to replace direct usage +// of memcpy with a pattern that is easy to validate for correctness by +// inspection, asserts, and fuzzing, but when compiled for optimization is just +// as efficient as raw memcpy. template class MemBlock { public: // Constructs a MemBlock of the specified size. From cf71f0b92f8b08ab1e8dbb61763d714558faf635 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 11:09:13 -0400 Subject: [PATCH 20/36] rename MemBlock to MemBlockBuilder. Signed-off-by: Joshua Marantz --- source/common/common/mem_block.h | 18 +++++++++--------- source/common/stats/fake_symbol_table_impl.h | 4 ++-- source/common/stats/symbol_table_impl.cc | 12 ++++++------ source/common/stats/symbol_table_impl.h | 16 ++++++++++------ test/common/common/mem_block_test.cc | 12 ++++++------ test/common/stats/symbol_table_impl_test.cc | 2 +- 6 files changed, 34 insertions(+), 30 deletions(-) diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block.h index 588024900bed..29c45bff6c15 100644 --- a/source/common/common/mem_block.h +++ b/source/common/common/mem_block.h @@ -11,24 +11,24 @@ namespace Envoy { // empty-constructible. This class carries extra member variables for tracking // size, and a write-pointer to support safe appends. // -// MemBlock is used to safely write blocks of data into a memory buffer. Due to +// MemBlockBuilder is used to safely write blocks of data into a memory buffer. Due to // two extra member variables, it is not optimal for storing in data // structures. The intended usage model is to release the raw assembled memory -// block from the MemBlock for efficient storage. +// block from the MemBlockBuilder for efficient storage. // // The goal for this class is to provide a usage model to replace direct usage // of memcpy with a pattern that is easy to validate for correctness by // inspection, asserts, and fuzzing, but when compiled for optimization is just // as efficient as raw memcpy. -template class MemBlock { +template class MemBlockBuilder { public: - // Constructs a MemBlock of the specified size. - explicit MemBlock(uint64_t size) + // Constructs a MemBlockBuilder of the specified size. + explicit MemBlockBuilder(uint64_t size) : data_(std::make_unique(size)), capacity_(size), write_ptr_(data_.get()) {} - MemBlock() : capacity_(0), write_ptr_(nullptr) {} + MemBlockBuilder() : capacity_(0), write_ptr_(nullptr) {} /** - * Populates (or repopulates) the MemBlock to make it the specified size. + * Populates (or repopulates) the MemBlockBuilder to make it the specified size. * This does not have resize semantics; when populate() is called any * previous contents are erased. * @@ -76,10 +76,10 @@ template class MemBlock { * * @param src the block to append. */ - void appendBlock(const MemBlock& src) { appendData(src.data_.get(), src.size()); } + void appendBlock(const MemBlockBuilder& src) { appendData(src.data_.get(), src.size()); } /** - * @return the number of elements remaining in the MemBlock. + * @return the number of elements remaining in the MemBlockBuilder. */ uint64_t capacityRemaining() const { return (data_.get() + capacity_) - write_ptr_; } diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 8881406eb2d2..63ecfe9fd825 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -72,7 +72,7 @@ class FakeSymbolTableImpl : public SymbolTable { // Now allocate the exact number of bytes required and move the encodings // into storage. - MemBlock mem_block(total_size_bytes); + MemBlockBuilder mem_block(total_size_bytes); mem_block.appendOne(num_names); for (uint32_t i = 0; i < num_names; ++i) { auto& name = names[i]; @@ -138,7 +138,7 @@ class FakeSymbolTableImpl : public SymbolTable { StoragePtr encodeHelper(absl::string_view name) const { ASSERT(!absl::EndsWith(name, ".")); - MemBlock mem_block(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); + MemBlockBuilder mem_block(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); mem_block.appendData(reinterpret_cast(name.data()), name.size()); return mem_block.release(); diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index cbd8259831cd..749b855e64d2 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -58,7 +58,7 @@ uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { return num_bytes; } -void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, MemBlock& mem_block) { +void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, MemBlockBuilder& mem_block) { // UTF-8-like encoding where a value 127 or less gets written as a single // byte. For higher values we write the low-order 7 bits with a 1 in // the high-order bit. Then we right-shift 7 bits and keep adding more bytes @@ -112,7 +112,7 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar return symbol_vec; } -void SymbolTableImpl::Encoding::moveToStorage(MemBlock& mem_block) { +void SymbolTableImpl::Encoding::moveToStorage(MemBlockBuilder& mem_block) { appendEncoding(data_bytes_required_, mem_block); mem_block.appendBlock(mem_block_); mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct. @@ -403,7 +403,7 @@ SymbolTable::StoragePtr SymbolTableImpl::encode(absl::string_view name) { ASSERT(!absl::EndsWith(name, ".")); Encoding encoding; addTokensToEncoding(name, encoding); - MemBlock mem_block(Encoding::totalSizeBytes(encoding.bytesRequired())); + MemBlockBuilder mem_block(Encoding::totalSizeBytes(encoding.bytesRequired())); encoding.moveToStorage(mem_block); return mem_block.release(); } @@ -413,7 +413,7 @@ StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { const uint64_t size = src.size(); - MemBlock storage(size); + MemBlockBuilder storage(size); src.copyToStorage(storage); bytes_ = storage.release(); table.incRefCount(statName()); @@ -481,7 +481,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con for (StatName stat_name : stat_names) { num_bytes += stat_name.dataSize(); } - MemBlock mem_block(Encoding::totalSizeBytes(num_bytes)); + MemBlockBuilder mem_block(Encoding::totalSizeBytes(num_bytes)); Encoding::appendEncoding(num_bytes, mem_block); for (StatName stat_name : stat_names) { //mem_block.appendData(stat_name.data(), stat_name.dataSize()); @@ -506,7 +506,7 @@ void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_ // Now allocate the exact number of bytes required and move the encodings // into storage. - MemBlock mem_block(total_size_bytes); + MemBlockBuilder mem_block(total_size_bytes); mem_block.appendOne(num_names); for (auto& encoding : encodings) { encoding.moveToStorage(mem_block); diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 0e2c307e393c..6f117f730586 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -114,7 +114,7 @@ class SymbolTableImpl : public SymbolTable { * * @param array destination memory to receive the encoded bytes. */ - void moveToStorage(MemBlock& array); + void moveToStorage(MemBlockBuilder& array); /** * @param number A number to encode in a variable length byte-array. @@ -138,9 +138,9 @@ class SymbolTableImpl : public SymbolTable { * Requires that the buffer be sized to accommodate encodingSizeBytes(number). * * @param number the number to write. - * @param MemBlock the memory into which to append the number. + * @param mem_block the memory into which to append the number. */ - static void appendEncoding(uint64_t number, MemBlock& mem_block); + static void appendEncoding(uint64_t number, MemBlockBuilder& mem_block); /** * Decodes a byte-array containing a variable-length number. @@ -154,7 +154,7 @@ class SymbolTableImpl : public SymbolTable { private: uint64_t data_bytes_required_{0}; - MemBlock mem_block_; + MemBlockBuilder mem_block_; }; SymbolTableImpl(); @@ -387,8 +387,12 @@ class StatName { */ uint64_t size() const { return SymbolTableImpl::Encoding::totalSizeBytes(dataSize()); } - void copyToStorage(MemBlock& storage) { storage.appendData(size_and_data_, size()); } - void copyDataToStorage(MemBlock& storage) { storage.appendData(data(), dataSize()); } + void copyToStorage(MemBlockBuilder& storage) { + storage.appendData(size_and_data_, size()); + } + void copyDataToStorage(MemBlockBuilder& storage) { + storage.appendData(data(), dataSize()); + } #ifndef ENVOY_CONFIG_COVERAGE void debugPrint(); diff --git a/test/common/common/mem_block_test.cc b/test/common/common/mem_block_test.cc index 4bc9cb0f5f35..0c448e46a345 100644 --- a/test/common/common/mem_block_test.cc +++ b/test/common/common/mem_block_test.cc @@ -4,8 +4,8 @@ namespace Envoy { -TEST(MemBlockTest, AppendUint8) { - MemBlock mem_block(10); +TEST(MemBlockBuilderTest, AppendUint8) { + MemBlockBuilder mem_block(10); EXPECT_EQ(10, mem_block.capacity()); mem_block.appendOne(5); EXPECT_EQ(9, mem_block.capacityRemaining()); @@ -13,7 +13,7 @@ TEST(MemBlockTest, AppendUint8) { mem_block.appendData(foo, ABSL_ARRAYSIZE(foo)); EXPECT_EQ(7, mem_block.capacityRemaining()); - MemBlock append; + MemBlockBuilder append; EXPECT_EQ(0, append.capacity()); append.populate(7); EXPECT_EQ(7, append.capacity()); @@ -32,8 +32,8 @@ TEST(MemBlockTest, AppendUint8) { EXPECT_EQ(0, mem_block.capacity()); } -TEST(MemBlockTest, AppendUint32) { - MemBlock mem_block(10); +TEST(MemBlockBuilderTest, AppendUint32) { + MemBlockBuilder mem_block(10); EXPECT_EQ(10, mem_block.capacity()); mem_block.appendOne(100005); EXPECT_EQ(9, mem_block.capacityRemaining()); @@ -41,7 +41,7 @@ TEST(MemBlockTest, AppendUint32) { mem_block.appendData(foo, ABSL_ARRAYSIZE(foo)); EXPECT_EQ(7, mem_block.capacityRemaining()); - MemBlock append; + MemBlockBuilder append; EXPECT_EQ(0, append.capacity()); append.populate(7); EXPECT_EQ(7, append.capacity()); diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index 2466a78f323c..7b2cafbd643c 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -72,7 +72,7 @@ class StatNameTest : public testing::TestWithParam { std::vector serializeDeserialize(uint64_t number) { const uint64_t num_bytes = SymbolTableImpl::Encoding::encodingSizeBytes(number); const uint64_t block_size = 10; - MemBlock mem_block(block_size); + MemBlockBuilder mem_block(block_size); SymbolTableImpl::Encoding::appendEncoding(number, mem_block); EXPECT_EQ(block_size, num_bytes + mem_block.capacityRemaining()); std::vector vec = mem_block.toVector(); From 256a85781e3d742cec7a95aba10c1b76af7585ed Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 11:14:14 -0400 Subject: [PATCH 21/36] Rename MemBlock to MemBlockBuilder to better reflect intended usage model. Signed-off-by: Joshua Marantz --- source/common/common/BUILD | 4 ++-- .../common/{mem_block.h => mem_block_builder.h} | 16 ++++++++-------- source/common/stats/BUILD | 2 +- source/common/stats/symbol_table_impl.h | 2 +- test/common/common/BUILD | 6 +++--- ...m_block_test.cc => mem_block_builder_test.cc} | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) rename source/common/common/{mem_block.h => mem_block_builder.h} (89%) rename test/common/common/{mem_block_test.cc => mem_block_builder_test.cc} (97%) diff --git a/source/common/common/BUILD b/source/common/common/BUILD index 675d203ff40b..8b2d933370cf 100644 --- a/source/common/common/BUILD +++ b/source/common/common/BUILD @@ -114,8 +114,8 @@ envoy_cc_library( ) envoy_cc_library( - name = "mem_block_lib", - hdrs = ["mem_block.h"], + name = "mem_block_builder_lib", + hdrs = ["mem_block_builder.h"], deps = [":assert_lib"], ) diff --git a/source/common/common/mem_block.h b/source/common/common/mem_block_builder.h similarity index 89% rename from source/common/common/mem_block.h rename to source/common/common/mem_block_builder.h index 29c45bff6c15..75e6d6e48561 100644 --- a/source/common/common/mem_block.h +++ b/source/common/common/mem_block_builder.h @@ -11,15 +11,15 @@ namespace Envoy { // empty-constructible. This class carries extra member variables for tracking // size, and a write-pointer to support safe appends. // -// MemBlockBuilder is used to safely write blocks of data into a memory buffer. Due to -// two extra member variables, it is not optimal for storing in data -// structures. The intended usage model is to release the raw assembled memory -// block from the MemBlockBuilder for efficient storage. +// MemBlockBuilder is used to safely write blocks of data into a memory +// buffer. Due to two extra member variables, it is not optimal for storing in +// data structures. The intended usage model is to release the raw assembled +// memory block from the MemBlockBuilder for efficient storage. // // The goal for this class is to provide a usage model to replace direct usage // of memcpy with a pattern that is easy to validate for correctness by -// inspection, asserts, and fuzzing, but when compiled for optimization is just -// as efficient as raw memcpy. +// inspection, asserts, and fuzzing, but when compiled for optimization is +// roughly as efficient as raw memcpy. template class MemBlockBuilder { public: // Constructs a MemBlockBuilder of the specified size. @@ -28,8 +28,8 @@ template class MemBlockBuilder { MemBlockBuilder() : capacity_(0), write_ptr_(nullptr) {} /** - * Populates (or repopulates) the MemBlockBuilder to make it the specified size. - * This does not have resize semantics; when populate() is called any + * Populates (or repopulates) the MemBlockBuilder to make it the specified + * size. This does not have resize semantics; when populate() is called any * previous contents are erased. * * @param size The number of memory elements to allocate. diff --git a/source/common/stats/BUILD b/source/common/stats/BUILD index 95e35b137a29..7c7f9a4eb870 100644 --- a/source/common/stats/BUILD +++ b/source/common/stats/BUILD @@ -160,7 +160,7 @@ envoy_cc_library( "//include/envoy/stats:symbol_table_interface", "//source/common/common:assert_lib", "//source/common/common:logger_lib", - "//source/common/common:mem_block_lib", + "//source/common/common:mem_block_builder_lib", "//source/common/common:stack_array", "//source/common/common:thread_lib", "//source/common/common:utility_lib", diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 6f117f730586..7cfba882402c 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -14,7 +14,7 @@ #include "common/common/assert.h" #include "common/common/hash.h" #include "common/common/lock_guard.h" -#include "common/common/mem_block.h" +#include "common/common/mem_block_builder.h" #include "common/common/non_copyable.h" #include "common/common/stack_array.h" #include "common/common/thread.h" diff --git a/test/common/common/BUILD b/test/common/common/BUILD index d10affe13601..ac426b3a45f3 100644 --- a/test/common/common/BUILD +++ b/test/common/common/BUILD @@ -69,9 +69,9 @@ envoy_cc_test( ) envoy_cc_test( - name = "mem_block_test", - srcs = ["mem_block_test.cc"], - deps = ["//source/common/common:mem_block_lib"], + name = "mem_block_builder_test", + srcs = ["mem_block_builder_test.cc"], + deps = ["//source/common/common:mem_block_builder_lib"], ) envoy_cc_test( diff --git a/test/common/common/mem_block_test.cc b/test/common/common/mem_block_builder_test.cc similarity index 97% rename from test/common/common/mem_block_test.cc rename to test/common/common/mem_block_builder_test.cc index 0c448e46a345..489cf2e990e9 100644 --- a/test/common/common/mem_block_test.cc +++ b/test/common/common/mem_block_builder_test.cc @@ -1,4 +1,4 @@ -#include "common/common/mem_block.h" +#include "common/common/mem_block_builder.h" #include "gtest/gtest.h" From 7b3150e1564bcfdb2c7b486a18ff40aa4f2ede1e Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 2 Nov 2019 17:27:10 -0400 Subject: [PATCH 22/36] format. Signed-off-by: Joshua Marantz --- source/common/common/mem_block_builder.h | 2 +- source/common/stats/symbol_table_impl.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/common/common/mem_block_builder.h b/source/common/common/mem_block_builder.h index 75e6d6e48561..733f48dc9124 100644 --- a/source/common/common/mem_block_builder.h +++ b/source/common/common/mem_block_builder.h @@ -29,7 +29,7 @@ template class MemBlockBuilder { /** * Populates (or repopulates) the MemBlockBuilder to make it the specified - * size. This does not have resize semantics; when populate() is called any + * size. This does not have resize semantics; when populate() is called any * previous contents are erased. * * @param size The number of memory elements to allocate. diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 749b855e64d2..5bf3a135ace7 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -58,7 +58,8 @@ uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { return num_bytes; } -void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, MemBlockBuilder& mem_block) { +void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, + MemBlockBuilder& mem_block) { // UTF-8-like encoding where a value 127 or less gets written as a single // byte. For higher values we write the low-order 7 bits with a 1 in // the high-order bit. Then we right-shift 7 bits and keep adding more bytes @@ -484,7 +485,6 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con MemBlockBuilder mem_block(Encoding::totalSizeBytes(num_bytes)); Encoding::appendEncoding(num_bytes, mem_block); for (StatName stat_name : stat_names) { - //mem_block.appendData(stat_name.data(), stat_name.dataSize()); stat_name.copyDataToStorage(mem_block); } return mem_block.release(); From c4ddd4ab7104c2f30f6b3a7b48321e0a3ada52eb Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 3 Nov 2019 13:14:47 -0500 Subject: [PATCH 23/36] improve comments and function naming. Signed-off-by: Joshua Marantz --- source/common/common/mem_block_builder.h | 5 +++- source/common/stats/symbol_table_impl.cc | 14 +++++----- source/common/stats/symbol_table_impl.h | 35 ++++++++++++++++++------ 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/source/common/common/mem_block_builder.h b/source/common/common/mem_block_builder.h index 733f48dc9124..4574508f551c 100644 --- a/source/common/common/mem_block_builder.h +++ b/source/common/common/mem_block_builder.h @@ -110,9 +110,12 @@ template class MemBlockBuilder { */ std::vector toVector() const { return std::vector(data_.get(), write_ptr_); } -private: + /** + * @return The number of elements the have been added to the builder. + */ uint64_t size() const { return write_ptr_ - data_.get(); } +private: std::unique_ptr data_; uint64_t capacity_; T* write_ptr_; diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 5bf3a135ace7..bcc75ea5e79f 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -44,8 +44,8 @@ void StatName::debugPrint() { #endif SymbolTableImpl::Encoding::~Encoding() { - // Verifies that moveToStorage() was called on this encoding. Failure - // to call moveToStorage() will result in leaks symbols. + // Verifies that moveToMemBlock() was called on this encoding. Failure + // to call moveToMemBlock() will result in leaks symbols. ASSERT(mem_block_.capacity() == 0); } @@ -113,7 +113,7 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar return symbol_vec; } -void SymbolTableImpl::Encoding::moveToStorage(MemBlockBuilder& mem_block) { +void SymbolTableImpl::Encoding::moveToMemBlock(MemBlockBuilder& mem_block) { appendEncoding(data_bytes_required_, mem_block); mem_block.appendBlock(mem_block_); mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct. @@ -405,7 +405,7 @@ SymbolTable::StoragePtr SymbolTableImpl::encode(absl::string_view name) { Encoding encoding; addTokensToEncoding(name, encoding); MemBlockBuilder mem_block(Encoding::totalSizeBytes(encoding.bytesRequired())); - encoding.moveToStorage(mem_block); + encoding.moveToMemBlock(mem_block); return mem_block.release(); } @@ -415,7 +415,7 @@ StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { const uint64_t size = src.size(); MemBlockBuilder storage(size); - src.copyToStorage(storage); + src.copyToMemBlock(storage); bytes_ = storage.release(); table.incRefCount(statName()); } @@ -485,7 +485,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con MemBlockBuilder mem_block(Encoding::totalSizeBytes(num_bytes)); Encoding::appendEncoding(num_bytes, mem_block); for (StatName stat_name : stat_names) { - stat_name.copyDataToStorage(mem_block); + stat_name.appendDataToMemBlock(mem_block); } return mem_block.release(); } @@ -509,7 +509,7 @@ void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_ MemBlockBuilder mem_block(total_size_bytes); mem_block.appendOne(num_names); for (auto& encoding : encodings) { - encoding.moveToStorage(mem_block); + encoding.moveToMemBlock(mem_block); } // This assertion double-checks the arithmetic where we computed diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 7cfba882402c..7698f345e860 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -75,11 +75,11 @@ class SymbolTableImpl : public SymbolTable { class Encoding { public: /** - * Before destructing SymbolEncoding, you must call moveToStorage. This + * Before destructing SymbolEncoding, you must call moveToMemBlock. This * transfers ownership, and in particular, the responsibility to call - * SymbolTable::clear() on all referenced symbols. If we ever wanted - * to be able to destruct a SymbolEncoding without transferring it - * we could add a clear(SymbolTable&) method. + * SymbolTable::clear() on all referenced symbols. If we ever wanted to be + * able to destruct a SymbolEncoding without transferring it we could add a + * clear(SymbolTable&) method. */ ~Encoding(); @@ -112,9 +112,9 @@ class SymbolTableImpl : public SymbolTable { * Moves the contents of the vector into an allocated array. The array * must have been allocated with bytesRequired() bytes. * - * @param array destination memory to receive the encoded bytes. + * @param mem_block_builder memory block to receive the encoded bytes. */ - void moveToStorage(MemBlockBuilder& array); + void moveToMemBlock(MemBlockBuilder& array); /** * @param number A number to encode in a variable length byte-array. @@ -387,10 +387,27 @@ class StatName { */ uint64_t size() const { return SymbolTableImpl::Encoding::totalSizeBytes(dataSize()); } - void copyToStorage(MemBlockBuilder& storage) { - storage.appendData(size_and_data_, size()); + /** + * Copies the entire StatName representation into a MemBlockBuilder, including + * the length metadata at the beginning. The MemBlockBuilder must not have + * any other data in it. + * + * @param mem_block_builder the builder to receive the storage. + */ + void copyToMemBlock(MemBlockBuilder& mem_block_builder) { + ASSERT(mem_block_builder.size() == 0); + mem_block_builder.appendData(size_and_data_, size()); } - void copyDataToStorage(MemBlockBuilder& storage) { + + /** + * Appends the data portion of the StatName representation into a + * MemBlockBuilder, excluding the length metadata. This is appropriate for + * join(), where several stat-names are combined, and we only need the + * aggregated length metadata. + * + * @param mem_block_builder the builder to receive the storage. + */ + void appendDataToMemBlock(MemBlockBuilder& storage) { storage.appendData(data(), dataSize()); } From 482497779f00427c7c4e3b075986410d088d4718 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 3 Nov 2019 14:18:52 -0500 Subject: [PATCH 24/36] guard memcpy against 0-byte calls, which show up as asan failures. Signed-off-by: Joshua Marantz --- source/common/common/mem_block_builder.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/common/common/mem_block_builder.h b/source/common/common/mem_block_builder.h index 4574508f551c..a6d13e7cb992 100644 --- a/source/common/common/mem_block_builder.h +++ b/source/common/common/mem_block_builder.h @@ -67,7 +67,9 @@ template class MemBlockBuilder { */ void appendData(const T* data, uint64_t size) { ASSERT(capacityRemaining() >= size); - memcpy(write_ptr_, data, size * sizeof(T)); + if (size != 0) { + memcpy(write_ptr_, data, size * sizeof(T)); + } write_ptr_ += size; } From 64c0af422282b4b475fc7897f3403f4514a8fc0b Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 17 Nov 2019 18:04:21 -0500 Subject: [PATCH 25/36] RELEASE_ASSERT rather than ASSERT, use non-constructing allocation. Signed-off-by: Joshua Marantz --- source/common/common/mem_block_builder.h | 51 ++++++++++++++++-------- source/common/stats/symbol_table_impl.cc | 1 - 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/source/common/common/mem_block_builder.h b/source/common/common/mem_block_builder.h index a6d13e7cb992..798be1bfd1da 100644 --- a/source/common/common/mem_block_builder.h +++ b/source/common/common/mem_block_builder.h @@ -7,9 +7,10 @@ namespace Envoy { -// Manages a block of raw memory for objects of type T. T must be -// empty-constructible. This class carries extra member variables for tracking -// size, and a write-pointer to support safe appends. +// Manages a block of raw memory for objects of type T. T is generally expected +// to be a POD, where it makes esnse to memcpy over it. This class carries extra +// member variables for tracking size, and a write-pointer to support safe +// appends. // // MemBlockBuilder is used to safely write blocks of data into a memory // buffer. Due to two extra member variables, it is not optimal for storing in @@ -22,10 +23,11 @@ namespace Envoy { // roughly as efficient as raw memcpy. template class MemBlockBuilder { public: - // Constructs a MemBlockBuilder of the specified size. + // Constructs a MemBlockBuilder allowing for 'size' instances of T. explicit MemBlockBuilder(uint64_t size) - : data_(std::make_unique(size)), capacity_(size), write_ptr_(data_.get()) {} - MemBlockBuilder() : capacity_(0), write_ptr_(nullptr) {} + : data_(alloc(size)), capacity_(size), capacity_remaining_(size), write_ptr_(data_) {} + MemBlockBuilder() : data_(nullptr), capacity_(0), capacity_remaining_(0), write_ptr_(nullptr) {} + ~MemBlockBuilder() { reset(); } /** * Populates (or repopulates) the MemBlockBuilder to make it the specified @@ -35,9 +37,10 @@ template class MemBlockBuilder { * @param size The number of memory elements to allocate. */ void populate(uint64_t size) { - data_ = std::make_unique(size); + data_ = alloc(size); capacity_ = size; - write_ptr_ = data_.get(); + capacity_remaining_ = size; + write_ptr_ = data_; } /** @@ -53,8 +56,9 @@ template class MemBlockBuilder { * @param object the object to append. */ void appendOne(T object) { - ASSERT(capacityRemaining() >= 1); + RELEASE_ASSERT(capacity_remaining_ >= 1, "insufficient capacity"); *write_ptr_++ = object; + --capacity_remaining_; } /** @@ -66,11 +70,12 @@ template class MemBlockBuilder { * @param size The number of elements in the block. */ void appendData(const T* data, uint64_t size) { - ASSERT(capacityRemaining() >= size); + RELEASE_ASSERT(capacity_remaining_ >= size, "insufficient capacity"); if (size != 0) { memcpy(write_ptr_, data, size * sizeof(T)); } write_ptr_ += size; + capacity_remaining_ -= size; } /** @@ -78,19 +83,23 @@ template class MemBlockBuilder { * * @param src the block to append. */ - void appendBlock(const MemBlockBuilder& src) { appendData(src.data_.get(), src.size()); } + void appendBlock(const MemBlockBuilder& src) { appendData(src.data_, src.size()); } /** * @return the number of elements remaining in the MemBlockBuilder. */ - uint64_t capacityRemaining() const { return (data_.get() + capacity_) - write_ptr_; } + uint64_t capacityRemaining() const { return capacity_remaining_; } /** * Empties the contents of this. */ void reset() { - data_.reset(); + if (data_ != nullptr) { + delete [] static_cast(data_); + data_ = nullptr; + } capacity_ = 0; + capacity_remaining_ = 0; write_ptr_ = nullptr; } @@ -102,7 +111,10 @@ template class MemBlockBuilder { std::unique_ptr release() { write_ptr_ = nullptr; capacity_ = 0; - return std::move(data_); + capacity_remaining_ = 0; + std::unique_ptr ret(data_); + data_ = nullptr; + return ret; } /** @@ -110,16 +122,21 @@ template class MemBlockBuilder { * * This is exposed to help with unit testing. */ - std::vector toVector() const { return std::vector(data_.get(), write_ptr_); } + std::vector toVector() const { return std::vector(data_, write_ptr_); } /** * @return The number of elements the have been added to the builder. */ - uint64_t size() const { return write_ptr_ - data_.get(); } + uint64_t size() const { return write_ptr_ - data_; } private: - std::unique_ptr data_; + static T* alloc(uint64_t size) { + return static_cast(new uint8_t[size]); + } + + T* data_; uint64_t capacity_; + uint64_t capacity_remaining_; T* write_ptr_; }; diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index e6503e1e901d..eef1af95267a 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -87,7 +87,6 @@ void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { for (Symbol symbol : symbols) { appendEncoding(symbol, mem_block_); } - // ASSERT(static_cast(bytes - storage_.get()) == data_bytes_required_); } std::pair SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { From 06e3fbc19ea60639b9743897a7595d1151ec052a Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 17 Nov 2019 21:35:02 -0500 Subject: [PATCH 26/36] add test to make sure we don't construct anything. Signed-off-by: Joshua Marantz --- source/common/common/mem_block_builder.h | 4 ++-- test/common/common/mem_block_builder_test.cc | 23 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/source/common/common/mem_block_builder.h b/source/common/common/mem_block_builder.h index 798be1bfd1da..8e61968a8f57 100644 --- a/source/common/common/mem_block_builder.h +++ b/source/common/common/mem_block_builder.h @@ -95,7 +95,7 @@ template class MemBlockBuilder { */ void reset() { if (data_ != nullptr) { - delete [] static_cast(data_); + delete [] reinterpret_cast(reinterpret_cast(data_)); data_ = nullptr; } capacity_ = 0; @@ -131,7 +131,7 @@ template class MemBlockBuilder { private: static T* alloc(uint64_t size) { - return static_cast(new uint8_t[size]); + return reinterpret_cast(reinterpret_cast(new uint8_t[size])); } T* data_; diff --git a/test/common/common/mem_block_builder_test.cc b/test/common/common/mem_block_builder_test.cc index 489cf2e990e9..4cbc66ec4dc2 100644 --- a/test/common/common/mem_block_builder_test.cc +++ b/test/common/common/mem_block_builder_test.cc @@ -61,4 +61,27 @@ TEST(MemBlockBuilderTest, AppendUint32) { EXPECT_EQ(0, mem_block.capacity()); } +static uint32_t calls_to_constructor = 0; +static uint32_t calls_to_destructor = 0; + +class TrackConstructors { + public: + TrackConstructors() { ++calls_to_constructor; } + ~TrackConstructors() { ++calls_to_destructor; } +}; + +TEST(MemBlockBuilderTesaet, AppendNonPod) { + { + MemBlockBuilder mem_block(10); + } + EXPECT_EQ(0, calls_to_constructor); + EXPECT_EQ(0, calls_to_destructor); + { + MemBlockBuilder mem_block(10); + std::unique_ptr data = mem_block.release(); + } + EXPECT_EQ(0, calls_to_constructor); + EXPECT_EQ(0, calls_to_destructor); +} + } // namespace Envoy From a281e289d00abf3df18660c3b3e2c2f4bb6eecd6 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 17 Nov 2019 23:49:15 -0500 Subject: [PATCH 27/36] update gold values Signed-off-by: Joshua Marantz --- test/integration/stats_integration_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/integration/stats_integration_test.cc b/test/integration/stats_integration_test.cc index d980bba9be7c..723386cc1ff1 100644 --- a/test/integration/stats_integration_test.cc +++ b/test/integration/stats_integration_test.cc @@ -259,7 +259,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // 2019/10/17 8484 43340 44000 stats: add unit support to histogram // 2019/11/01 8859 43563 44000 build: switch to libc++ by default // 2019/11/15 9040 43371 44000 build: update protobuf to 3.10.1 - // 2019/11/13 8779 42856 43000 use var-length coding for name length + // 2019/11/18 8779 42887 43000 use var-length coding for name length // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -273,7 +273,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // If you encounter a failure here, please see // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests // for details on how to fix. - EXPECT_MEMORY_EQ(m_per_cluster, 42856); // 104 bytes higher than a debug build. + EXPECT_MEMORY_EQ(m_per_cluster, 42887); // 104 bytes higher than a debug build. EXPECT_MEMORY_LE(m_per_cluster, 44000); } @@ -306,7 +306,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // 2019/10/17 8484 34998 35000 stats: add unit support to histogram // 2019/11/01 8859 35221 36000 build: switch to libc++ by default // 2019/11/15 9040 35029 35500 build: update protobuf to 3.10.1 - // 2019/11/13 8779 34990 35000 use var-length coding for name lengths + // 2019/11/13 8779 35021 35000 use var-length coding for name lengths // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -320,7 +320,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // If you encounter a failure here, please see // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests // for details on how to fix. - EXPECT_MEMORY_EQ(m_per_cluster, 35029); // 104 bytes higher than a debug build. + EXPECT_MEMORY_EQ(m_per_cluster, 35021); // 104 bytes higher than a debug build. EXPECT_MEMORY_LE(m_per_cluster, 35500); } From cd98dd2ea83d5c9b94cc25f8c735d6601a97400c Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 19 Nov 2019 14:13:53 -0500 Subject: [PATCH 28/36] Go back to new-ing an array and clean up. Signed-off-by: Joshua Marantz --- source/common/common/mem_block_builder.h | 49 +++++++++----------- test/common/common/mem_block_builder_test.cc | 23 --------- 2 files changed, 21 insertions(+), 51 deletions(-) diff --git a/source/common/common/mem_block_builder.h b/source/common/common/mem_block_builder.h index 8e61968a8f57..20634f814264 100644 --- a/source/common/common/mem_block_builder.h +++ b/source/common/common/mem_block_builder.h @@ -8,7 +8,7 @@ namespace Envoy { // Manages a block of raw memory for objects of type T. T is generally expected -// to be a POD, where it makes esnse to memcpy over it. This class carries extra +// to be a POD, where it makes sense to memcpy over it. This class carries extra // member variables for tracking size, and a write-pointer to support safe // appends. // @@ -23,24 +23,24 @@ namespace Envoy { // roughly as efficient as raw memcpy. template class MemBlockBuilder { public: - // Constructs a MemBlockBuilder allowing for 'size' instances of T. - explicit MemBlockBuilder(uint64_t size) - : data_(alloc(size)), capacity_(size), capacity_remaining_(size), write_ptr_(data_) {} - MemBlockBuilder() : data_(nullptr), capacity_(0), capacity_remaining_(0), write_ptr_(nullptr) {} - ~MemBlockBuilder() { reset(); } + // Constructs a MemBlockBuilder allowing for 'capacity' instances of T. + explicit MemBlockBuilder(uint64_t capacity) + : data_(std::make_unique(capacity)), capacity_(capacity), capacity_remaining_(capacity), + write_ptr_(data_.get()) {} + MemBlockBuilder() : capacity_(0), capacity_remaining_(0), write_ptr_(nullptr) {} /** * Populates (or repopulates) the MemBlockBuilder to make it the specified - * size. This does not have resize semantics; when populate() is called any + * capacity. This does not have resize semantics; when populate() is called any * previous contents are erased. * - * @param size The number of memory elements to allocate. + * @param capcity The number of memory elements to allocate. */ - void populate(uint64_t size) { - data_ = alloc(size); - capacity_ = size; - capacity_remaining_ = size; - write_ptr_ = data_; + void populate(uint64_t capacity) { + data_ = std::make_unique(capacity); + capacity_ = capacity; + capacity_remaining_ = capacity; + write_ptr_ = data_.get(); } /** @@ -57,6 +57,7 @@ template class MemBlockBuilder { */ void appendOne(T object) { RELEASE_ASSERT(capacity_remaining_ >= 1, "insufficient capacity"); + ASSERT(write_ptr_ < (data_.get() + capacity_)); *write_ptr_++ = object; --capacity_remaining_; } @@ -71,6 +72,7 @@ template class MemBlockBuilder { */ void appendData(const T* data, uint64_t size) { RELEASE_ASSERT(capacity_remaining_ >= size, "insufficient capacity"); + ASSERT((write_ptr_ + size) <= (data_.get() + capacity_)); if (size != 0) { memcpy(write_ptr_, data, size * sizeof(T)); } @@ -83,7 +85,7 @@ template class MemBlockBuilder { * * @param src the block to append. */ - void appendBlock(const MemBlockBuilder& src) { appendData(src.data_, src.size()); } + void appendBlock(const MemBlockBuilder& src) { appendData(src.data_.get(), src.size()); } /** * @return the number of elements remaining in the MemBlockBuilder. @@ -94,10 +96,7 @@ template class MemBlockBuilder { * Empties the contents of this. */ void reset() { - if (data_ != nullptr) { - delete [] reinterpret_cast(reinterpret_cast(data_)); - data_ = nullptr; - } + data_.reset(); capacity_ = 0; capacity_remaining_ = 0; write_ptr_ = nullptr; @@ -112,9 +111,7 @@ template class MemBlockBuilder { write_ptr_ = nullptr; capacity_ = 0; capacity_remaining_ = 0; - std::unique_ptr ret(data_); - data_ = nullptr; - return ret; + return std::move(data_); } /** @@ -122,19 +119,15 @@ template class MemBlockBuilder { * * This is exposed to help with unit testing. */ - std::vector toVector() const { return std::vector(data_, write_ptr_); } + std::vector toVector() const { return std::vector(data_.get(), write_ptr_); } /** * @return The number of elements the have been added to the builder. */ - uint64_t size() const { return write_ptr_ - data_; } + uint64_t size() const { return write_ptr_ - data_.get(); } private: - static T* alloc(uint64_t size) { - return reinterpret_cast(reinterpret_cast(new uint8_t[size])); - } - - T* data_; + std::unique_ptr data_; uint64_t capacity_; uint64_t capacity_remaining_; T* write_ptr_; diff --git a/test/common/common/mem_block_builder_test.cc b/test/common/common/mem_block_builder_test.cc index 4cbc66ec4dc2..489cf2e990e9 100644 --- a/test/common/common/mem_block_builder_test.cc +++ b/test/common/common/mem_block_builder_test.cc @@ -61,27 +61,4 @@ TEST(MemBlockBuilderTest, AppendUint32) { EXPECT_EQ(0, mem_block.capacity()); } -static uint32_t calls_to_constructor = 0; -static uint32_t calls_to_destructor = 0; - -class TrackConstructors { - public: - TrackConstructors() { ++calls_to_constructor; } - ~TrackConstructors() { ++calls_to_destructor; } -}; - -TEST(MemBlockBuilderTesaet, AppendNonPod) { - { - MemBlockBuilder mem_block(10); - } - EXPECT_EQ(0, calls_to_constructor); - EXPECT_EQ(0, calls_to_destructor); - { - MemBlockBuilder mem_block(10); - std::unique_ptr data = mem_block.release(); - } - EXPECT_EQ(0, calls_to_constructor); - EXPECT_EQ(0, calls_to_destructor); -} - } // namespace Envoy From 6dcc218716341bc8de366991772a447c81105253 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 10 Dec 2019 17:16:21 -0500 Subject: [PATCH 29/36] typo and gold values update. Signed-off-by: Joshua Marantz --- source/common/stats/symbol_table_impl.cc | 2 +- test/integration/stats_integration_test.cc | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index eef1af95267a..6c62268299c6 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -70,7 +70,7 @@ void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, // high-order bit 0. do { if (number < (1 << 7)) { - mem_block.appendOne(number); // number <= 127 get encoded in one byte. + mem_block.appendOne(number); // number <= 127 gets encoded in one byte. } else { mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes. } diff --git a/test/integration/stats_integration_test.cc b/test/integration/stats_integration_test.cc index 64c622199d1f..2bde648d46ef 100644 --- a/test/integration/stats_integration_test.cc +++ b/test/integration/stats_integration_test.cc @@ -260,7 +260,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // 2019/11/01 8859 43563 44000 build: switch to libc++ by default // 2019/11/15 9040 43371 44000 build: update protobuf to 3.10.1 // 2019/11/15 9040 43403 44000 upstream: track whether cluster is local - // 2019/12/10 8779 42887 43500 use var-length coding for name length + // 2019/12/10 8779 42919 43500 use var-length coding for name length // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -274,7 +274,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // If you encounter a failure here, please see // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests // for details on how to fix. - EXPECT_MEMORY_EQ(m_per_cluster, 42887); // 104 bytes higher than a debug build. + EXPECT_MEMORY_EQ(m_per_cluster, 42919); // 104 bytes higher than a debug build. EXPECT_MEMORY_LE(m_per_cluster, 43500); } @@ -308,7 +308,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // 2019/11/01 8859 35221 36000 build: switch to libc++ by default // 2019/11/15 9040 35029 35500 build: update protobuf to 3.10.1 // 2019/11/15 9040 35061 35500 upstream: track whether cluster is local - // 2019/12/10 8779 35021 35000 use var-length coding for name lengths + // 2019/12/10 8779 35053 35000 use var-length coding for name lengths // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -322,7 +322,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // If you encounter a failure here, please see // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests // for details on how to fix. - EXPECT_MEMORY_EQ(m_per_cluster, 35021); // 104 bytes higher than a debug build. + EXPECT_MEMORY_EQ(m_per_cluster, 35053); // 104 bytes higher than a debug build. EXPECT_MEMORY_LE(m_per_cluster, 35500); } From 9033ac6c9d4b909e2211027f45848d4557cd1616 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 15 Dec 2019 12:49:50 -0500 Subject: [PATCH 30/36] post-merge cleanup Signed-off-by: Joshua Marantz --- source/common/common/mem_block_builder.h | 3 ++- source/common/stats/fake_symbol_table_impl.h | 6 ++++-- source/common/stats/symbol_table_impl.h | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/source/common/common/mem_block_builder.h b/source/common/common/mem_block_builder.h index 9749583b4947..734fa1f144ba 100644 --- a/source/common/common/mem_block_builder.h +++ b/source/common/common/mem_block_builder.h @@ -96,7 +96,8 @@ template class MemBlockBuilder { * @return the transferred storage. */ std::unique_ptr release() { - write_span_.reset(); + uint8_t* empty = nullptr; + write_span_ = absl::MakeSpan(empty, 0); return std::move(data_); } diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 821337587780..087d2bba9c13 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -79,7 +79,8 @@ class FakeSymbolTableImpl : public SymbolTable { size_t sz = name.size(); SymbolTableImpl::Encoding::appendEncoding(sz, mem_block); if (!name.empty()) { - mem_block.appendData(reinterpret_cast(name.data()), sz); + mem_block.appendData(absl::MakeConstSpan( + reinterpret_cast(name.data()), sz)); } } @@ -140,7 +141,8 @@ class FakeSymbolTableImpl : public SymbolTable { name = StringUtil::removeTrailingCharacters(name, '.'); MemBlockBuilder mem_block(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); - mem_block.appendData(reinterpret_cast(name.data()), name.size()); + mem_block.appendData(absl::MakeConstSpan( + reinterpret_cast(name.data()), name.size())); return mem_block.release(); } }; diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 7698f345e860..7cd2856ba2e9 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -396,7 +396,7 @@ class StatName { */ void copyToMemBlock(MemBlockBuilder& mem_block_builder) { ASSERT(mem_block_builder.size() == 0); - mem_block_builder.appendData(size_and_data_, size()); + mem_block_builder.appendData(absl::MakeConstSpan(size_and_data_, size())); } /** @@ -408,7 +408,7 @@ class StatName { * @param mem_block_builder the builder to receive the storage. */ void appendDataToMemBlock(MemBlockBuilder& storage) { - storage.appendData(data(), dataSize()); + storage.appendData(absl::MakeConstSpan(data(), dataSize())); } #ifndef ENVOY_CONFIG_COVERAGE From d19d7224614b4b1d9597638697a5e894f59b18f2 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 15 Dec 2019 12:53:47 -0500 Subject: [PATCH 31/36] more cleanups Signed-off-by: Joshua Marantz --- source/common/stats/fake_symbol_table_impl.h | 8 ++++---- test/common/common/mem_block_builder_test.cc | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 087d2bba9c13..a64325cf1177 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -79,8 +79,8 @@ class FakeSymbolTableImpl : public SymbolTable { size_t sz = name.size(); SymbolTableImpl::Encoding::appendEncoding(sz, mem_block); if (!name.empty()) { - mem_block.appendData(absl::MakeConstSpan( - reinterpret_cast(name.data()), sz)); + mem_block.appendData( + absl::MakeConstSpan(reinterpret_cast(name.data()), sz)); } } @@ -141,8 +141,8 @@ class FakeSymbolTableImpl : public SymbolTable { name = StringUtil::removeTrailingCharacters(name, '.'); MemBlockBuilder mem_block(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); - mem_block.appendData(absl::MakeConstSpan( - reinterpret_cast(name.data()), name.size())); + mem_block.appendData( + absl::MakeConstSpan(reinterpret_cast(name.data()), name.size())); return mem_block.release(); } }; diff --git a/test/common/common/mem_block_builder_test.cc b/test/common/common/mem_block_builder_test.cc index d715d0dc9268..6435ebeb5558 100644 --- a/test/common/common/mem_block_builder_test.cc +++ b/test/common/common/mem_block_builder_test.cc @@ -1,7 +1,7 @@ -#include "common/common/mem_block_builder.h" - #include +#include "common/common/mem_block_builder.h" + #include "gtest/gtest.h" namespace Envoy { From ae1da1b4ea537f3decd97976b102c4273493814f Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 15 Dec 2019 15:27:44 -0500 Subject: [PATCH 32/36] Cleanup lifetime issues in test infrastructure. Signed-off-by: Joshua Marantz --- test/common/stats/symbol_table_impl_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index 5a4e85a07336..a4304ef5b42c 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -76,9 +76,9 @@ class StatNameTest : public testing::TestWithParam { MemBlockBuilder mem_block(block_size); SymbolTableImpl::Encoding::appendEncoding(number, mem_block); EXPECT_EQ(block_size, num_bytes + mem_block.capacityRemaining()); - std::vector vec = mem_block.toVector(); - EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(&vec[0]).first) << number; - return vec; + absl::Span span = mem_block.span(); + EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(span.data()).first) << number; + return std::vector(span.data(), span.data() + span.size()); } FakeSymbolTableImpl* fake_symbol_table_{nullptr}; From bc9871e9eccee8fb37c83bf70ee48d1f51d9932c Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 16 Dec 2019 15:09:43 -0500 Subject: [PATCH 33/36] update MemBlockBuilder interface change Signed-off-by: Joshua Marantz --- source/common/common/mem_block_builder.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/common/common/mem_block_builder.h b/source/common/common/mem_block_builder.h index 734fa1f144ba..fac7f01cc0c6 100644 --- a/source/common/common/mem_block_builder.h +++ b/source/common/common/mem_block_builder.h @@ -96,8 +96,7 @@ template class MemBlockBuilder { * @return the transferred storage. */ std::unique_ptr release() { - uint8_t* empty = nullptr; - write_span_ = absl::MakeSpan(empty, 0); + write_span_ = absl::MakeSpan(static_cast(nullptr), 0); return std::move(data_); } From f4ef7c6f0f7290c66795782d5cd607272d84738f Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 16 Dec 2019 15:11:00 -0500 Subject: [PATCH 34/36] popuplate -> setCacacity Signed-off-by: Joshua Marantz --- source/common/stats/symbol_table_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 6c62268299c6..9ce19a85bedb 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -83,7 +83,7 @@ void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { for (Symbol symbol : symbols) { data_bytes_required_ += encodingSizeBytes(symbol); } - mem_block_.populate(data_bytes_required_); + mem_block_.setCapacity(data_bytes_required_); for (Symbol symbol : symbols) { appendEncoding(symbol, mem_block_); } From 3edfac7cbace275f33b28713676748dbc239c525 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 17 Dec 2019 23:02:56 -0500 Subject: [PATCH 35/36] add direct fuzzing of the encoder. Signed-off-by: Joshua Marantz --- test/common/stats/symbol_table_fuzz_test.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/common/stats/symbol_table_fuzz_test.cc b/test/common/stats/symbol_table_fuzz_test.cc index 715d8ce829ca..a69de3256364 100644 --- a/test/common/stats/symbol_table_fuzz_test.cc +++ b/test/common/stats/symbol_table_fuzz_test.cc @@ -24,6 +24,20 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { // string before comparing. absl::string_view trimmed_fuzz_data = StringUtil::removeTrailingCharacters(next_data, '.'); FUZZ_ASSERT(trimmed_fuzz_data == symbol_table.toString(stat_name)); + + // Also encode the string directly, without symbolizing it. + MemBlockBuilder mem_block(SymbolTableImpl::Encoding::totalSizeBytes(next_data.size())); + SymbolTableImpl::Encoding::appendEncoding(next_data.size(), mem_block); + const uint8_t* data = reinterpret_cast(next_data.data()); + mem_block.appendData(absl::MakeSpan(data, data + next_data.size())); + FUZZ_ASSERT(mem_block.capacityRemaining() == 0); + absl::Span span = mem_block.span(); + std::pair number_consumed = + SymbolTableImpl::Encoding::decodeNumber(span.data()); + FUZZ_ASSERT(number_consumed.first == next_data.size()); + span.remove_prefix(number_consumed.second); + FUZZ_ASSERT(absl::string_view(reinterpret_cast(span.data()), span.size()) == + next_data); } } From 4f5be6cf99737dbae6e27ae98ad48d6a97dc55d2 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Wed, 18 Dec 2019 16:34:51 -0500 Subject: [PATCH 36/36] Share testing infrastructure between fuzz-tests and unit-tests. Signed-off-by: Joshua Marantz --- test/common/stats/BUILD | 1 + test/common/stats/stat_test_utility.cc | 31 +++++++++++++++++++++ test/common/stats/stat_test_utility.h | 8 ++++++ test/common/stats/symbol_table_fuzz_test.cc | 24 ++++++++-------- test/common/stats/symbol_table_impl_test.cc | 21 ++++++++------ 5 files changed, 65 insertions(+), 20 deletions(-) diff --git a/test/common/stats/BUILD b/test/common/stats/BUILD index 3bb028802e23..0c8e4d868b1c 100644 --- a/test/common/stats/BUILD +++ b/test/common/stats/BUILD @@ -129,6 +129,7 @@ envoy_cc_fuzz_test( srcs = ["symbol_table_fuzz_test.cc"], corpus = "symbol_table_corpus", deps = [ + ":stat_test_utility_lib", "//source/common/buffer:buffer_lib", "//source/common/common:assert_lib", "//source/common/decompressor:decompressor_lib", diff --git a/test/common/stats/stat_test_utility.cc b/test/common/stats/stat_test_utility.cc index 8520266e433f..adcd6d7a3fa5 100644 --- a/test/common/stats/stat_test_utility.cc +++ b/test/common/stats/stat_test_utility.cc @@ -135,6 +135,37 @@ MemoryTest::Mode MemoryTest::mode() { #endif } +// TODO(jmarantz): this utility is intended to be costs both for unit tests +// and fuzz tests. But those have different checking macros, e.g. EXPECT_EQ vs +// FUZZ_ASSERT. +std::vector serializeDeserializeNumber(uint64_t number) { + uint64_t num_bytes = SymbolTableImpl::Encoding::encodingSizeBytes(number); + const uint64_t block_size = 10; + MemBlockBuilder mem_block(block_size); + SymbolTableImpl::Encoding::appendEncoding(number, mem_block); + num_bytes += mem_block.capacityRemaining(); + RELEASE_ASSERT(block_size == num_bytes, absl::StrCat("Encoding size issue: block_size=", + block_size, " num_bytes=", num_bytes)); + absl::Span span = mem_block.span(); + RELEASE_ASSERT(number == SymbolTableImpl::Encoding::decodeNumber(span.data()).first, ""); + return std::vector(span.data(), span.data() + span.size()); +} + +void serializeDeserializeString(absl::string_view in) { + MemBlockBuilder mem_block(SymbolTableImpl::Encoding::totalSizeBytes(in.size())); + SymbolTableImpl::Encoding::appendEncoding(in.size(), mem_block); + const uint8_t* data = reinterpret_cast(in.data()); + mem_block.appendData(absl::MakeSpan(data, data + in.size())); + RELEASE_ASSERT(mem_block.capacityRemaining() == 0, ""); + absl::Span span = mem_block.span(); + const std::pair number_consumed = + SymbolTableImpl::Encoding::decodeNumber(span.data()); + RELEASE_ASSERT(number_consumed.first == in.size(), absl::StrCat("size matches: ", in)); + span.remove_prefix(number_consumed.second); + const absl::string_view out(reinterpret_cast(span.data()), span.size()); + RELEASE_ASSERT(in == out, absl::StrCat("'", in, "' != '", out, "'")); +} + } // namespace TestUtil } // namespace Stats } // namespace Envoy diff --git a/test/common/stats/stat_test_utility.h b/test/common/stats/stat_test_utility.h index c10fe4032065..ae0857674867 100644 --- a/test/common/stats/stat_test_utility.h +++ b/test/common/stats/stat_test_utility.h @@ -112,6 +112,14 @@ class SymbolTableCreatorTestPeer { } }; +// Serializes a number into a uint8_t array, and check that it de-serializes to +// the same number. The serialized number is also returned, which can be +// checked in unit tests, but ignored in fuzz tests. +std::vector serializeDeserializeNumber(uint64_t number); + +// Serializes a string into a MemBlock and then decodes it. +void serializeDeserializeString(absl::string_view in); + } // namespace TestUtil } // namespace Stats } // namespace Envoy diff --git a/test/common/stats/symbol_table_fuzz_test.cc b/test/common/stats/symbol_table_fuzz_test.cc index a69de3256364..884a16512e1d 100644 --- a/test/common/stats/symbol_table_fuzz_test.cc +++ b/test/common/stats/symbol_table_fuzz_test.cc @@ -2,6 +2,7 @@ #include "common/common/base64.h" #include "common/stats/symbol_table_impl.h" +#include "test/common/stats/stat_test_utility.h" #include "test/fuzz/fuzz_runner.h" #include "test/fuzz/utility.h" @@ -26,18 +27,17 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { FUZZ_ASSERT(trimmed_fuzz_data == symbol_table.toString(stat_name)); // Also encode the string directly, without symbolizing it. - MemBlockBuilder mem_block(SymbolTableImpl::Encoding::totalSizeBytes(next_data.size())); - SymbolTableImpl::Encoding::appendEncoding(next_data.size(), mem_block); - const uint8_t* data = reinterpret_cast(next_data.data()); - mem_block.appendData(absl::MakeSpan(data, data + next_data.size())); - FUZZ_ASSERT(mem_block.capacityRemaining() == 0); - absl::Span span = mem_block.span(); - std::pair number_consumed = - SymbolTableImpl::Encoding::decodeNumber(span.data()); - FUZZ_ASSERT(number_consumed.first == next_data.size()); - span.remove_prefix(number_consumed.second); - FUZZ_ASSERT(absl::string_view(reinterpret_cast(span.data()), span.size()) == - next_data); + TestUtil::serializeDeserializeString(next_data); + + // Grab the first few bytes from next_data to synthesize together a random uint64_t. + if (next_data.size() > 1) { + uint32_t num_bytes = (next_data[0] % 8) + 1; // random number between 1 and 8 inclusive. + uint64_t number = 0; + for (uint32_t i = 0; i < num_bytes; ++i) { + number = 256 * number + next_data[i + 1]; + } + TestUtil::serializeDeserializeNumber(number); + } } } diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index a4304ef5b42c..d185898cd36f 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -71,14 +71,7 @@ class StatNameTest : public testing::TestWithParam { StatName makeStat(absl::string_view name) { return pool_->add(name); } std::vector serializeDeserialize(uint64_t number) { - const uint64_t num_bytes = SymbolTableImpl::Encoding::encodingSizeBytes(number); - const uint64_t block_size = 10; - MemBlockBuilder mem_block(block_size); - SymbolTableImpl::Encoding::appendEncoding(number, mem_block); - EXPECT_EQ(block_size, num_bytes + mem_block.capacityRemaining()); - absl::Span span = mem_block.span(); - EXPECT_EQ(number, SymbolTableImpl::Encoding::decodeNumber(span.data()).first) << number; - return std::vector(span.data(), span.data() + span.size()); + return TestUtil::serializeDeserializeNumber(number); } FakeSymbolTableImpl* fake_symbol_table_{nullptr}; @@ -120,6 +113,18 @@ TEST_P(StatNameTest, SerializeBytes) { } } +TEST_P(StatNameTest, SerializeStrings) { + TestUtil::serializeDeserializeString(""); + TestUtil::serializeDeserializeString("Hello, world!"); + TestUtil::serializeDeserializeString("embedded\0\nul"); + TestUtil::serializeDeserializeString(std::string(200, 'a')); + TestUtil::serializeDeserializeString(std::string(2000, 'a')); + TestUtil::serializeDeserializeString(std::string(20000, 'a')); + TestUtil::serializeDeserializeString(std::string(200000, 'a')); + TestUtil::serializeDeserializeString(std::string(2000000, 'a')); + TestUtil::serializeDeserializeString(std::string(20000000, 'a')); +} + TEST_P(StatNameTest, AllocFree) { encodeDecode("hello.world"); } TEST_P(StatNameTest, TestArbitrarySymbolRoundtrip) {