diff --git a/README.md b/README.md index a6f2687..e6a2e22 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,8 @@ * Header-only library * Fast, asynchronous, multi-threaded processing using: - [Lock-free Concurrent Queues](https://github.com/cameron314/concurrentqueue) - - [Robin hood Hashing](https://github.com/Tessil/robin-map) -* Requires C++11 + - [Robin hood Hashing](https://github.com/martinus/robin-hood-hashing) +* Requires C++17 * MIT License ## Table of Contents @@ -29,7 +29,7 @@ Simply include reader.hpp and you're good to go. ```cpp -#include +#include ``` To start parsing CSV files, create a ```csv::Reader``` object and call ```.read(filename)```. @@ -43,8 +43,8 @@ This ```.read``` method is non-blocking. The reader spawns multiple threads to t ```cpp while(foo.busy()) { if (foo.has_row()) { - auto row = foo.next_row(); // Each row is a robin_map (https://github.com/Tessil/robin-map) - auto foo = row["foo"] // You can use it just like an std::unordered_map + auto row = foo.next_row(); // Each row is a csv::unordered_flat_map (github.com/martinus/robin-hood-hashing) + auto foo = row["foo"] // You can use it just like an std::unordered_map auto bar = row["bar"]; // do something } @@ -256,7 +256,7 @@ Note: Do not provide num_rows greater than the actual number of rows in the file void parse(const std::string& filename) { csv::Reader foo; foo.read(filename); - std::vector> rows; + std::vector> rows; while (foo.busy()) { if (foo.ready()) { auto row = foo.next_row(); @@ -267,7 +267,7 @@ void parse(const std::string& filename) { ``` ```bash -$ g++ -pthread -std=c++11 -O3 -Iinclude/ -o test benchmark.cpp +$ g++ -pthread -std=c++17 -O3 -Iinclude/ -o test benchmark.cpp $ time ./test ``` @@ -289,7 +289,7 @@ Here are the average-case execution times: Simply include writer.hpp and you're good to go. ```cpp -#include +#include ``` To start writing CSV files, create a ```csv::Writer``` object and provide a filename: @@ -308,13 +308,13 @@ foo.configure_dialect() Now it's time to write rows. You can do this in multiple ways: ```cpp -foo.write_row("1", "2", "3"); // parameter packing -foo.write_row({"4", "5", "6"}); // std::vector -foo.write_row(std::map{ // std::map +foo.write_row("1", "2", "3"); // parameter packing +foo.write_row({"4", "5", "6"}); // std::vector +foo.write_row(std::map{ // std::map {"a", "7"}, {"b", "8"}, {"c", "9"} }); -foo.write_row(std::unordered_map{ // std::unordered_map +foo.write_row(std::unordered_map{ // std::unordered_map {"a", "7"}, {"b", "8"}, {"c", "9"} }); -foo.write_row(csv::robin_map{ // robin_map +foo.write_row(csv::unordered_flat_map{ // csv::unordered_flat_map {"a", "7"}, {"b", "8"}, {"c", "9"} }); ``` diff --git a/include/.LICENSE.concurrent_queue.md b/include/csv/.LICENSE.concurrent_queue.md similarity index 100% rename from include/.LICENSE.concurrent_queue.md rename to include/csv/.LICENSE.concurrent_queue.md diff --git a/include/.LICENSE.robin_map.md b/include/csv/.LICENSE.robin_hood.md similarity index 96% rename from include/.LICENSE.robin_map.md rename to include/csv/.LICENSE.robin_hood.md index af99670..e9a58ad 100644 --- a/include/.LICENSE.robin_map.md +++ b/include/csv/.LICENSE.robin_hood.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2017 Tessil +Copyright (c) 2018-2019 Martin Ankerl Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/include/concurrent_queue.hpp b/include/csv/concurrent_queue.hpp similarity index 100% rename from include/concurrent_queue.hpp rename to include/csv/concurrent_queue.hpp diff --git a/include/dialect.hpp b/include/csv/dialect.hpp similarity index 97% rename from include/dialect.hpp rename to include/csv/dialect.hpp index 47121f0..1fcbe91 100644 --- a/include/dialect.hpp +++ b/include/csv/dialect.hpp @@ -30,9 +30,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once +#include #include #include -#include +#include namespace csv { @@ -43,7 +44,7 @@ namespace csv { char line_terminator_; char quote_character_; bool double_quote_; - robin_map ignore_columns_; + unordered_flat_map ignore_columns_; std::vector trim_characters_; std::vector column_names_; bool header_; diff --git a/include/reader.hpp b/include/csv/reader.hpp similarity index 81% rename from include/reader.hpp rename to include/csv/reader.hpp index 38ad8f8..e4f17d2 100644 --- a/include/reader.hpp +++ b/include/csv/reader.hpp @@ -30,9 +30,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once -#include -#include -#include +#include +#include +#include #include #include #include @@ -44,6 +44,8 @@ SOFTWARE. #include #include #include +#include +#include namespace csv { @@ -67,25 +69,25 @@ namespace csv { ignore_columns_enabled_(false), trimming_enabled_(false) { - std::shared_ptr unix_dialect = std::make_shared(); + Dialect unix_dialect; unix_dialect - ->delimiter(",") + .delimiter(",") .quote_character('"') .double_quote(true) .header(true); dialects_["unix"] = unix_dialect; - std::shared_ptr excel_dialect = std::make_shared(); + Dialect excel_dialect; excel_dialect - ->delimiter(",") + .delimiter(",") .quote_character('"') .double_quote(true) .header(true); dialects_["excel"] = excel_dialect; - std::shared_ptr excel_tab_dialect = std::make_shared(); + Dialect excel_tab_dialect; excel_tab_dialect - ->delimiter("\t") + .delimiter("\t") .quote_character('"') .double_quote(true) .header(true); @@ -125,10 +127,10 @@ namespace csv { return result; } - robin_map next_row() { + unordered_flat_map next_row() { row_iterator_queue_.enqueue(next_index_); next_index_ += 1; - robin_map result; + unordered_flat_map result; rows_.try_dequeue(rows_ctoken_, result); return result; } @@ -143,10 +145,10 @@ namespace csv { expected_number_of_rows_ = rows; - if (current_dialect_->trim_characters_.size() > 0) + if (current_dialect_.trim_characters_.size() > 0) trimming_enabled_ = true; - if (current_dialect_->ignore_columns_.size() > 0) + if (current_dialect_.ignore_columns_.size() > 0) ignore_columns_enabled_ = true; reading_thread_started_ = true; @@ -167,20 +169,20 @@ namespace csv { while (std::getline(stream_, line)) { if (line.size() > 0 && line[line.size() - 1] == '\r') line.pop_back(); - if (line != "" || (!current_dialect_->skip_empty_rows_ && line == "")) + if (line != "" || (!current_dialect_.skip_empty_rows_ && line == "")) ++expected_number_of_rows_; } - if (current_dialect_->header_ && expected_number_of_rows_ > 0) + if (current_dialect_.header_ && expected_number_of_rows_ > 0) expected_number_of_rows_ -= 1; stream_.clear(); stream_.seekg(0, std::ios::beg); - if (current_dialect_->trim_characters_.size() > 0) + if (current_dialect_.trim_characters_.size() > 0) trimming_enabled_ = true; - if (current_dialect_->ignore_columns_.size() > 0) + if (current_dialect_.ignore_columns_.size() > 0) ignore_columns_enabled_ = true; reading_thread_started_ = true; @@ -189,13 +191,12 @@ namespace csv { Dialect& configure_dialect(const std::string& dialect_name = "excel") { if (dialects_.find(dialect_name) != dialects_.end()) { - return *dialects_[dialect_name]; + return dialects_[dialect_name]; } else { - std::shared_ptr dialect_object = std::make_shared(); - dialects_[dialect_name] = dialect_object; + dialects_[dialect_name] = Dialect(); current_dialect_name_ = dialect_name; - return *dialect_object; + return dialects_[dialect_name]; } } @@ -207,7 +208,7 @@ namespace csv { } Dialect& get_dialect(const std::string& dialect_name) { - return *(dialects_[dialect_name]); + return dialects_[dialect_name]; } void use_dialect(const std::string& dialect_name) { @@ -217,8 +218,8 @@ namespace csv { } } - std::vector> rows() { - std::vector> rows; + std::vector> rows() { + std::vector> rows; while (!done()) { if (ready()) { rows.push_back(next_row()); @@ -241,10 +242,6 @@ namespace csv { } void read_internal() { - if (!current_dialect_) { - throw std::runtime_error("error: Dialect " + current_dialect_name_ + " not found"); - } - // Get current position std::streamoff length = stream_.tellg(); @@ -260,13 +257,13 @@ namespace csv { } split(first_line); - if (current_dialect_->header_) { + if (current_dialect_.header_) { headers_ = current_split_result_; } else { headers_.clear(); - if (current_dialect_->column_names_.size() > 0) { - headers_ = current_dialect_->column_names_; + if (current_dialect_.column_names_.size() > 0) { + headers_ = current_dialect_.column_names_; } else { for (size_t i = 0; i < current_split_result_.size(); i++) @@ -281,18 +278,16 @@ namespace csv { for (auto& header : headers_) current_row_[header] = ""; if (ignore_columns_enabled_) - for (auto&kvpair : current_dialect_->ignore_columns_) + for (auto&kvpair : current_dialect_.ignore_columns_) current_row_.erase(kvpair.first); // Start processing thread processing_thread_ = std::thread(&Reader::process_values, this); - processing_mutex_.lock(); processing_thread_started_ = true; - processing_mutex_.unlock(); // Get lines one at a time, split on the delimiter and // enqueue the split results into the values_ queue - bool skip_empty_rows = current_dialect_->skip_empty_rows_; + bool skip_empty_rows = current_dialect_.skip_empty_rows_; std::string row; size_t number_of_rows = 0; while (std::getline(stream_, row)) { @@ -312,9 +307,9 @@ namespace csv { void process_values() { size_t index = 0; - auto ignore_columns = current_dialect_->ignore_columns_; + auto ignore_columns = current_dialect_.ignore_columns_; size_t i; - std::string column_name; + std::string_view column_name; size_t number_of_rows = 0; while (number_of_rows < expected_number_of_rows_) { if (front(current_value_)) { @@ -336,8 +331,8 @@ namespace csv { std::string ltrim(std::string const& input) { std::string result = input; result.erase(result.begin(), std::find_if(result.begin(), result.end(), [=](int ch) { - return !(std::find(current_dialect_->trim_characters_.begin(), current_dialect_->trim_characters_.end(), ch) - != current_dialect_->trim_characters_.end()); + return !(std::find(current_dialect_.trim_characters_.begin(), current_dialect_.trim_characters_.end(), ch) + != current_dialect_.trim_characters_.end()); })); return std::move(result); } @@ -346,15 +341,15 @@ namespace csv { std::string rtrim(std::string const& input) { std::string result = input; result.erase(std::find_if(result.rbegin(), result.rend(), [=](int ch) { - return !(std::find(current_dialect_->trim_characters_.begin(), current_dialect_->trim_characters_.end(), ch) - != current_dialect_->trim_characters_.end()); + return !(std::find(current_dialect_.trim_characters_.begin(), current_dialect_.trim_characters_.end(), ch) + != current_dialect_.trim_characters_.end()); }).base(), result.end()); return std::move(result); } // trim white spaces from either end of an input string std::string trim(std::string const& input) { - if (current_dialect_->trim_characters_.size() == 0) + if (current_dialect_.trim_characters_.size() == 0) return input; return ltrim(rtrim(input)); } @@ -375,16 +370,16 @@ namespace csv { // Check if ch is the start of a delimiter sequence bool delimiter_detected = false; - for (size_t j = 0; j < current_dialect_->delimiter_.size(); ++j) { + for (size_t j = 0; j < current_dialect_.delimiter_.size(); ++j) { char ch = input_string[i]; - if (ch != current_dialect_->delimiter_[j]) { + if (ch != current_dialect_.delimiter_[j]) { delimiter_detected = false; break; } else { // ch *might* be the start of a delimiter sequence - if (j + 1 == current_dialect_->delimiter_.size()) { + if (j + 1 == current_dialect_.delimiter_.size()) { if (quotes_encountered % 2 == 0) { // Reached end of delimiter sequence without breaking // delimiter detected! @@ -394,7 +389,7 @@ namespace csv { // If enabled, skip initial space right after delimiter if (i + 1 < input_string_size) { - if (current_dialect_->skip_initial_space_ && input_string[i + 1] == ' ') { + if (current_dialect_.skip_initial_space_ && input_string[i + 1] == ' ') { i = i + 1; } } @@ -418,10 +413,10 @@ namespace csv { if (!delimiter_detected) sub_result += input_string[i]; - if (input_string[i] == current_dialect_->quote_character_) + if (input_string[i] == current_dialect_.quote_character_) quotes_encountered += 1; - if (input_string[i] == current_dialect_->quote_character_ && - current_dialect_->double_quote_ && + if (input_string[i] == current_dialect_.quote_character_ && + current_dialect_.double_quote_ && sub_result.size() >= 2 && sub_result[sub_result.size() - 2] == input_string[i]) quotes_encountered -= 1; @@ -443,37 +438,33 @@ namespace csv { std::string filename_; std::ifstream stream_; std::vector headers_; - robin_map current_row_; + unordered_flat_map current_row_; std::string current_value_; - ConcurrentQueue> rows_; + ConcurrentQueue> rows_; ProducerToken rows_ptoken_; ConsumerToken rows_ctoken_; ConcurrentQueue number_of_rows_processed_; - std::mutex processing_mutex_; - // Member variables to keep track of rows/cols size_t columns_; size_t expected_number_of_rows_; - std::mutex entries_mutex_; // Member variables to enable streaming ConcurrentQueue row_iterator_queue_; size_t row_iterator_index_; - std::mutex size_mutex_; std::thread reading_thread_; bool reading_thread_started_; std::thread processing_thread_; - bool processing_thread_started_; + std::atomic processing_thread_started_; ConcurrentQueue values_; ProducerToken values_ptoken_; ConsumerToken values_ctoken_; std::string current_dialect_name_; - robin_map> dialects_; - std::shared_ptr current_dialect_; + unordered_flat_map dialects_; + Dialect current_dialect_; size_t done_index_; size_t ready_index_; size_t next_index_; diff --git a/include/csv/robin_hood.hpp b/include/csv/robin_hood.hpp new file mode 100644 index 0000000..5b5fe78 --- /dev/null +++ b/include/csv/robin_hood.hpp @@ -0,0 +1,1683 @@ +// ______ _____ ______ _________ +// ______________ ___ /_ ___(_)_______ ___ /_ ______ ______ ______ / +// __ ___/_ __ \__ __ \__ / __ __ \ __ __ \_ __ \_ __ \_ __ / +// _ / / /_/ /_ /_/ /_ / _ / / / _ / / // /_/ // /_/ // /_/ / +// /_/ \____/ /_.___/ /_/ /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/ +// _/_____/ +// +// robin_hood::unordered_map for C++14 +// version 3.2.0 +// https://github.com/martinus/robin-hood-hashing +// +// Licensed under the MIT License . +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2019 Martin Ankerl +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef ROBIN_HOOD_H_INCLUDED +#define ROBIN_HOOD_H_INCLUDED + +// see https://semver.org/ +#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes +#define ROBIN_HOOD_VERSION_MINOR 2 // for adding functionality in a backwards-compatible manner +#define ROBIN_HOOD_VERSION_PATCH 0 // for backwards-compatible bug fixes + +#include +#include +#include +#include +#include +#include +#include + +// #define ROBIN_HOOD_LOG_ENABLED +#ifdef ROBIN_HOOD_LOG_ENABLED +# include +# define ROBIN_HOOD_LOG(x) std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl +#else +# define ROBIN_HOOD_LOG(x) +#endif + +// mark unused members with this macro +#define ROBIN_HOOD_UNUSED(identifier) + +// bitness +#if SIZE_MAX == UINT32_MAX +# define ROBIN_HOOD_BITNESS 32 +#elif SIZE_MAX == UINT64_MAX +# define ROBIN_HOOD_BITNESS 64 +#else +# error Unsupported bitness +#endif + +// endianess +#ifdef _WIN32 +# define ROBIN_HOOD_LITTLE_ENDIAN 1 +# define ROBIN_HOOD_BIG_ENDIAN 0 +#else +# if __GNUC__ >= 4 +# define ROBIN_HOOD_LITTLE_ENDIAN (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define ROBIN_HOOD_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# else +# error cannot determine endianness +# endif +#endif + +// inline +#ifdef _WIN32 +# define ROBIN_HOOD_NOINLINE __declspec(noinline) +#else +# if __GNUC__ >= 4 +# define ROBIN_HOOD_NOINLINE __attribute__((noinline)) +# else +# define ROBIN_HOOD_NOINLINE +# endif +#endif + +// count leading/trailing bits +#ifdef _WIN32 +# if ROBIN_HOOD_BITNESS == 32 +# define ROBIN_HOOD_BITSCANFORWARD _BitScanForward +# else +# define ROBIN_HOOD_BITSCANFORWARD _BitScanForward64 +# endif +# include +# pragma intrinsic(ROBIN_HOOD_BITSCANFORWARD) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ + [](size_t mask) -> int { \ + unsigned long index; \ + return ROBIN_HOOD_BITSCANFORWARD(&index, mask) ? index : ROBIN_HOOD_BITNESS; \ + }(x) +#else +# if __GNUC__ >= 4 +# if ROBIN_HOOD_BITNESS == 32 +# define ROBIN_HOOD_CTZ(x) __builtin_ctzl(x) +# define ROBIN_HOOD_CLZ(x) __builtin_clzl(x) +# else +# define ROBIN_HOOD_CTZ(x) __builtin_ctzll(x) +# define ROBIN_HOOD_CLZ(x) __builtin_clzll(x) +# endif +# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) (x ? ROBIN_HOOD_CLZ(x) : ROBIN_HOOD_BITNESS) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) (x ? ROBIN_HOOD_CTZ(x) : ROBIN_HOOD_BITNESS) +# else +# error clz not supported +# endif +#endif + +// umulh +#if defined(__SIZEOF_INT128__) +# define ROBIN_HOOD_UMULH(a, b) \ + static_cast( \ + (static_cast(a) * static_cast(b)) >> 64u) +#elif (defined(_WIN32) && ROBIN_HOOD_BITNESS == 64) +# include // for __umulh +# pragma intrinsic(__umulh) +# define ROBIN_HOOD_UMULH(a, b) __umulh(a, b) +#endif + +namespace csv { + +namespace detail { + +// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other +// inlinings more difficult. Throws are also generally the slow path. +template +static ROBIN_HOOD_NOINLINE void doThrow(Args&&... args) { + throw E(std::forward(args)...); +} + +template +static T* assertNotNull(T* t, Args&&... args) { + if (nullptr == t) { + doThrow(std::forward(args)...); + } + return t; +} + +template +inline T unaligned_load(void const* ptr) { + // using memcpy so we don't get into unaligned load problems. + // compiler should optimize this very well anyways. + T t; + std::memcpy(&t, ptr, sizeof(T)); + return t; +} + +// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor, +// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a +// pointer. +template +class BulkPoolAllocator { +public: + BulkPoolAllocator() + : mHead(nullptr) + , mListForFree(nullptr) {} + + // does not copy anything, just creates a new allocator. + BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) + : mHead(nullptr) + , mListForFree(nullptr) {} + + BulkPoolAllocator(BulkPoolAllocator&& o) + : mHead(o.mHead) + , mListForFree(o.mListForFree) { + o.mListForFree = nullptr; + o.mHead = nullptr; + } + + BulkPoolAllocator& operator=(BulkPoolAllocator&& o) { + reset(); + mHead = o.mHead; + mListForFree = o.mListForFree; + o.mListForFree = nullptr; + o.mHead = nullptr; + return *this; + } + + BulkPoolAllocator& operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) { + // does not do anything + return *this; + } + + ~BulkPoolAllocator() { + reset(); + } + + // Deallocates all allocated memory. + void reset() { + while (mListForFree) { + T* tmp = *mListForFree; + free(mListForFree); + mListForFree = reinterpret_cast(tmp); + } + mHead = nullptr; + } + + // allocates, but does NOT initialize. Use in-place new constructor, e.g. + // T* obj = pool.allocate(); + // ::new (static_cast(obj)) T(); + T* allocate() { + T* tmp = mHead; + if (!tmp) { + tmp = performAllocation(); + } + + mHead = *reinterpret_cast(tmp); + return tmp; + } + + // does not actually deallocate but puts it in store. + // make sure you have already called the destructor! e.g. with + // obj->~T(); + // pool.deallocate(obj); + void deallocate(T* obj) { + *reinterpret_cast(obj) = mHead; + mHead = obj; + } + + // Adds an already allocated block of memory to the allocator. This allocator is from now on + // responsible for freeing the data (with free()). If the provided data is not large enough to + // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor. + void addOrFree(void* ptr, const size_t numBytes) { + // calculate number of available elements in ptr + if (numBytes < ALIGNMENT + ALIGNED_SIZE) { + // not enough data for at least one element. Free and return. + free(ptr); + } else { + add(ptr, numBytes); + } + } + + void swap(BulkPoolAllocator& other) { + using std::swap; + swap(mHead, other.mHead); + swap(mListForFree, other.mListForFree); + } + +private: + // iterates the list of allocated memory to calculate how many to alloc next. + // Recalculating this each time saves us a size_t member. + // This ignores the fact that memory blocks might have been added manually with addOrFree. In + // practice, this should not matter much. + size_t calcNumElementsToAlloc() const { + auto tmp = mListForFree; + size_t numAllocs = MinNumAllocs; + + while (numAllocs * 2 <= MaxNumAllocs && tmp) { + auto x = reinterpret_cast(tmp); + tmp = *x; + numAllocs *= 2; + } + + return numAllocs; + } + + // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree(). + void add(void* ptr, const size_t numBytes) { + const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE; + + auto data = reinterpret_cast(ptr); + + // link free list + auto x = reinterpret_cast(data); + *x = mListForFree; + mListForFree = data; + + // create linked list for newly allocated data + auto const headT = reinterpret_cast(reinterpret_cast(ptr) + ALIGNMENT); + + auto const head = reinterpret_cast(headT); + + // Visual Studio compiler automatically unrolls this loop, which is pretty cool + for (size_t i = 0; i < numElements; ++i) { + *reinterpret_cast(head + i * ALIGNED_SIZE) = head + (i + 1) * ALIGNED_SIZE; + } + + // last one points to 0 + *reinterpret_cast(head + (numElements - 1) * ALIGNED_SIZE) = mHead; + mHead = headT; + } + + // Called when no memory is available (mHead == 0). + // Don't inline this slow path. + ROBIN_HOOD_NOINLINE T* performAllocation() { + size_t const numElementsToAlloc = calcNumElementsToAlloc(); + + // alloc new memory: [prev |T, T, ... T] + // std::cout << (sizeof(T*) + ALIGNED_SIZE * numElementsToAlloc) << " bytes" << std::endl; + size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc; + add(assertNotNull(malloc(bytes)), bytes); + return mHead; + } + + // enforce byte alignment of the T's + static constexpr size_t ALIGNMENT = + (std::max)(std::alignment_of::value, std::alignment_of::value); + static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT; + + static_assert(MinNumAllocs >= 1, "MinNumAllocs"); + static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs"); + static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE"); + static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod"); + static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT"); + + T* mHead; + T** mListForFree; +}; + +template +struct NodeAllocator; + +// dummy allocator that does nothing +template +struct NodeAllocator { + + // we are not using the data, so just free it. + void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) { + free(ptr); + } +}; + +template +struct NodeAllocator : public BulkPoolAllocator {}; + +// All empty maps initial mInfo point to this infobyte. That way lookup in an empty map +// always returns false, and this is a very hot byte. +// +// we have to use data >1byte (at least 2 bytes), because initially we set mShift to 63 (has to be +// <63), so initial index will be 0 or 1. +namespace DummyInfoByte { + +static uint64_t b = 0; + +} // namespace DummyInfoByte +} // namespace detail + +struct is_transparent_tag {}; + +// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable, +// which means it would not be allowed to be used in std::memcpy. This struct is copyable, which is +// also tested. +template +struct pair { + using first_type = First; + using second_type = Second; + + // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. + explicit pair(std::pair const& o) + : first{o.first} + , second{o.second} {} + + // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. + explicit pair(std::pair&& o) + : first{std::move(o.first)} + , second{std::move(o.second)} {} + + constexpr pair(const First& firstArg, const Second& secondArg) + : first{firstArg} + , second{secondArg} {} + + constexpr pair(First&& firstArg, Second&& secondArg) + : first{std::move(firstArg)} + , second{std::move(secondArg)} {} + + template + constexpr pair(FirstArg&& firstArg, SecondArg&& secondArg) + : first{std::forward(firstArg)} + , second{std::forward(secondArg)} {} + + template + pair(std::piecewise_construct_t /*unused*/, std::tuple firstArgs, + std::tuple secondArgs) + : pair{firstArgs, secondArgs, std::index_sequence_for{}, + std::index_sequence_for{}} {} + + // constructor called from the std::piecewise_construct_t ctor + template + inline pair(std::tuple& tuple1, std::tuple& tuple2, + std::index_sequence /*unused*/, + std::index_sequence /*unused*/) + : first{std::forward(std::get(tuple1))...} + , second{std::forward(std::get(tuple2))...} { + // make visual studio compiler happy about warning about unused tuple1 & tuple2. + // Visual studio's pair implementation disables warning 4100. + (void)tuple1; + (void)tuple2; + } + + first_type& getFirst() { + return first; + } + first_type const& getFirst() const { + return first; + } + second_type& getSecond() { + return second; + } + second_type const& getSecond() const { + return second; + } + + void swap(pair& o) { + using std::swap; + swap(first, o.first); + swap(second, o.second); + } + + First first; + Second second; +}; + +// A thin wrapper around std::hash, performing a single multiplication to (hopefully) get nicely +// randomized upper bits, which are used by the unordered_map. +template +struct hash : public std::hash { + size_t operator()(T const& obj) const { + return std::hash::operator()(obj); + } +}; + +// Murmur2 hash without caring about big endianness. Generally much faster than the standard +// std::hash for std::string, and the code is quite simple. +template <> +struct hash { + size_t operator()(std::string const& str) const { + static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); + static constexpr uint64_t seed = UINT64_C(0xe17a1465); + static constexpr unsigned int r = 47; + + size_t const len = str.size(); + auto const data64 = reinterpret_cast(str.data()); + uint64_t h = seed ^ (len * m); + + size_t const n_blocks = len / 8; + for (size_t i = 0; i < n_blocks; ++i) { + uint64_t k = detail::unaligned_load(data64 + i); + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + auto const data8 = reinterpret_cast(data64 + n_blocks); + switch (len & 7u) { + case 7: + h ^= static_cast(data8[6]) << 48u; + // fallthrough + case 6: + h ^= static_cast(data8[5]) << 40u; + // fallthrough + case 5: + h ^= static_cast(data8[4]) << 32u; + // fallthrough + case 4: + h ^= static_cast(data8[3]) << 24u; + // fallthrough + case 3: + h ^= static_cast(data8[2]) << 16u; + // fallthrough + case 2: + h ^= static_cast(data8[1]) << 8u; + // fallthrough + case 1: + h ^= static_cast(data8[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return static_cast(h); + } +}; + +// specialization used for uint64_t and int64_t. Uses 128bit multiplication +template <> +struct hash { + size_t operator()(uint64_t const& obj) const { +#if defined(ROBIN_HOOD_UMULH) + // 167079903232 masksum, 122791318 ops best: 0xfa1371431ef43ae1 0xfe9b65e7da1b3187 + return static_cast(ROBIN_HOOD_UMULH(UINT64_C(0xfa1371431ef43ae1), obj) * + UINT64_C(0xfe9b65e7da1b3187)); +#else + // murmurhash 3 finalizer + uint64_t h = obj; + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + h ^= h >> 33; + return static_cast(h); +#endif + } +}; + +template <> +struct hash { + size_t operator()(int64_t const& obj) const { + return hash{}(static_cast(obj)); + } +}; + +template <> +struct hash { + size_t operator()(uint32_t const& h) const { +#if ROBIN_HOOD_BITNESS == 32 + return static_cast((UINT64_C(0xca4bcaa75ec3f625) * (uint64_t)h) >> 32); +#else + return hash{}(static_cast(h)); +#endif + } +}; + +template <> +struct hash { + size_t operator()(int32_t const& obj) const { + return hash{}(static_cast(obj)); + } +}; + +namespace detail { + +// A highly optimized hashmap implementation, using the Robin Hood algorithm. +// +// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but be +// about 2x faster in most cases and require much less allocations. +// +// This implementation uses the following memory layout: +// +// [Node, Node, ... Node | info, info, ... infoSentinel ] +// +// * Node: either a DataNode that directly has the std::pair as member, +// or a DataNode with a pointer to std::pair. Which DataNode representation to use +// depends on how fast the swap() operation is. Heuristically, this is automatically choosen based +// on sizeof(). there are always 2^n Nodes. +// +// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes. +// Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the +// corresponding node contains data. Set to 2 means the corresponding Node is filled, but it +// actually belongs to the previous position and was pushed out because that place is already +// taken. +// +// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the need +// for a idx +// variable. +// +// According to STL, order of templates has effect on throughput. That's why I've moved the boolean +// to the front. +// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/ +template +class unordered_map + : public Hash, + public KeyEqual, + detail::NodeAllocator< + csv::pair::type, T>, 4, 16384, + IsFlatMap> { +public: + using key_type = Key; + using mapped_type = T; + using value_type = + csv::pair::type, T>; + using size_type = size_t; + using hasher = Hash; + using key_equal = KeyEqual; + using Self = + unordered_map; + static constexpr bool is_flat_map = IsFlatMap; + +private: + static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100, + "MaxLoadFactor100 needs to be >10 && < 100"); + + // configuration defaults + + // make sure we have 8 elements, needed to quickly rehash mInfo + static constexpr size_t InitialNumElements = sizeof(uint64_t); + static constexpr int InitialInfoNumBits = 5; + static constexpr uint8_t InitialInfoInc = 1 << InitialInfoNumBits; + static constexpr uint8_t InitialInfoHashShift = sizeof(size_t) * 8 - InitialInfoNumBits; + using DataPool = detail::NodeAllocator; + + // type needs to be wider than uint8_t. + using InfoType = int32_t; + +private: + // DataNode //////////////////////////////////////////////////////// + + // Primary template for the data node. We have special implementations for small and big + // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these on + // the heap so swap merely swaps a pointer. + template + class DataNode {}; + + // Small: just allocate on the stack. + template + class DataNode { + public: + template + explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) + : mData(std::forward(args)...) {} + + DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) + : mData(std::move(n.mData)) {} + + // doesn't do anything + void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) {} + void destroyDoNotDeallocate() {} + + value_type const* operator->() const { + return &mData; + } + value_type* operator->() { + return &mData; + } + + const value_type& operator*() const { + return mData; + } + + value_type& operator*() { + return mData; + } + + typename value_type::first_type& getFirst() { + return mData.first; + } + + typename value_type::first_type const& getFirst() const { + return mData.first; + } + + typename value_type::second_type& getSecond() { + return mData.second; + } + + typename value_type::second_type const& getSecond() const { + return mData.second; + } + + void swap(DataNode& o) { + mData.swap(o.mData); + } + + private: + value_type mData; + }; + + // big object: allocate on heap. + template + class DataNode { + public: + template + explicit DataNode(M& map, Args&&... args) + : mData(map.allocate()) { + ::new (static_cast(mData)) value_type(std::forward(args)...); + } + + DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) + : mData(std::move(n.mData)) {} + + void destroy(M& map) { + // don't deallocate, just put it into list of datapool. + mData->~value_type(); + map.deallocate(mData); + } + + void destroyDoNotDeallocate() { + mData->~value_type(); + } + + value_type const* operator->() const { + return mData; + } + + value_type* operator->() { + return mData; + } + + const value_type& operator*() const { + return *mData; + } + + value_type& operator*() { + return *mData; + } + + typename value_type::first_type& getFirst() { + return mData->first; + } + + typename value_type::first_type const& getFirst() const { + return mData->first; + } + + typename value_type::second_type& getSecond() { + return mData->second; + } + + typename value_type::second_type const& getSecond() const { + return mData->second; + } + + void swap(DataNode& o) { + using std::swap; + swap(mData, o.mData); + } + + private: + value_type* mData; + }; + + using Node = DataNode; + + // Cloner ////////////////////////////////////////////////////////// + + template + struct Cloner; + + // fast path: Just copy data, without allocating anything. + template + struct Cloner { + void operator()(M const& source, M& target) const { + // std::memcpy(target.mKeyVals, source.mKeyVals, + // target.calcNumBytesTotal(target.mMask + 1)); + auto src = reinterpret_cast(source.mKeyVals); + auto tgt = reinterpret_cast(target.mKeyVals); + std::copy(src, src + target.calcNumBytesTotal(target.mMask + 1), tgt); + } + }; + + template + struct Cloner { + void operator()(M const& source, M& target) const { + // make sure to copy initialize sentinel as well + // std::memcpy(target.mInfo, source.mInfo, target.calcNumBytesInfo(target.mMask + 1)); + std::copy(source.mInfo, source.mInfo + target.calcNumBytesInfo(target.mMask + 1), + target.mInfo); + + for (size_t i = 0; i < target.mMask + 1; ++i) { + if (target.mInfo[i]) { + ::new (static_cast(target.mKeyVals + i)) + Node(target, *source.mKeyVals[i]); + } + } + } + }; + + // Destroyer /////////////////////////////////////////////////////// + + template + struct Destroyer {}; + + template + struct Destroyer { + void nodes(M& m) const { + m.mNumElements = 0; + } + + void nodesDoNotDeallocate(M& m) const { + m.mNumElements = 0; + } + }; + + template + struct Destroyer { + void nodes(M& m) const { + m.mNumElements = 0; + // clear also resets mInfo to 0, that's sometimes not necessary. + for (size_t idx = 0; idx <= m.mMask; ++idx) { + if (0 != m.mInfo[idx]) { + Node& n = m.mKeyVals[idx]; + n.destroy(m); + n.~Node(); + } + } + } + + void nodesDoNotDeallocate(M& m) const { + m.mNumElements = 0; + // clear also resets mInfo to 0, that's sometimes not necessary. + for (size_t idx = 0; idx <= m.mMask; ++idx) { + if (0 != m.mInfo[idx]) { + Node& n = m.mKeyVals[idx]; + n.destroyDoNotDeallocate(); + n.~Node(); + } + } + } + }; + + // Iter //////////////////////////////////////////////////////////// + + // generic iterator for both const_iterator and iterator. + template + class Iter { + private: + using NodePtr = typename std::conditional::type; + + public: + using difference_type = std::ptrdiff_t; + using value_type = typename Self::value_type; + using reference = typename std::conditional::type; + using pointer = typename std::conditional::type; + using iterator_category = std::forward_iterator_tag; + + // default constructed iterator can be compared to itself, but WON'T return true when + // compared to end(). + Iter() + : mKeyVals(nullptr) + , mInfo(nullptr) {} + + // both const_iterator and iterator can be constructed from a non-const iterator + Iter(Iter const& other) + : mKeyVals(other.mKeyVals) + , mInfo(other.mInfo) {} + + Iter(NodePtr valPtr, uint8_t const* infoPtr) + : mKeyVals(valPtr) + , mInfo(infoPtr) {} + + // prefix increment. Undefined behavior if we are at end()! + Iter& operator++() { + mInfo++; + mKeyVals++; + int inc; + do { + auto const n = detail::unaligned_load(mInfo); +#if ROBIN_HOOD_LITTLE_ENDIAN + inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; +#else + inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8; +#endif + mInfo += inc; + mKeyVals += inc; + } while (inc == sizeof(size_t)); + return *this; + } + + reference operator*() const { + return **mKeyVals; + } + + pointer operator->() const { + return &**mKeyVals; + } + + template + bool operator==(Iter const& o) const { + return mKeyVals == o.mKeyVals; + } + + template + bool operator!=(Iter const& o) const { + return mKeyVals != o.mKeyVals; + } + + private: + friend class unordered_map; + NodePtr mKeyVals; + uint8_t const* mInfo; + }; + + //////////////////////////////////////////////////////////////////// + + size_t calcNumBytesInfo(size_t numElements) const { + const size_t s = sizeof(uint8_t) * (numElements + 1); + if (s / sizeof(uint8_t) != numElements + 1) { + throwOverflowError(); + } + // make sure it's a bit larger, so we can load 64bit numbers + return s + sizeof(uint64_t); + } + size_t calcNumBytesNode(size_t numElements) const { + const size_t s = sizeof(Node) * numElements; + if (s / sizeof(Node) != numElements) { + throwOverflowError(); + } + return s; + } + size_t calcNumBytesTotal(size_t numElements) const { + const size_t si = calcNumBytesInfo(numElements); + const size_t sn = calcNumBytesNode(numElements); + const size_t s = si + sn; + if (s <= si || s <= sn) { + throwOverflowError(); + } + return s; + } + + // highly performance relevant code. + // Lower bits are used for indexing into the array (2^n size) + // The upper 5 bits need to be a good hash, to save comparisons. + template + void keyToIdx(HashKey&& key, size_t& idx, InfoType& info) const { + idx = Hash::operator()(key); + info = static_cast(mInfoInc + static_cast(idx >> mInfoHashShift)); + idx &= mMask; + } + + // forwards the index by one, wrapping around at the end + void next(InfoType* info, size_t* idx) const { + *idx = (*idx + 1) & mMask; + *info = static_cast(*info + mInfoInc); + } + + void nextWhileLess(InfoType* info, size_t* idx) const { + // unrolling this by hand did not bring any speedups. + while (*info < mInfo[*idx]) { + next(info, idx); + } + } + + // Shift everything up by one element. Tries to move stuff around. + // True if some shifting has occured (entry under idx is a constructed object) + // Fals if no shift has occured (entry under idx is unconstructed memory) + void shiftUp(size_t idx, size_t const insertion_idx) { + while (idx != insertion_idx) { + size_t prev_idx = (idx - 1) & mMask; + if (mInfo[idx]) { + mKeyVals[idx] = std::move(mKeyVals[prev_idx]); + } else { + ::new (static_cast(mKeyVals + idx)) Node(std::move(mKeyVals[prev_idx])); + } + mInfo[idx] = static_cast(mInfo[prev_idx] + mInfoInc); + if (0xFF <= mInfo[idx] + mInfoInc) { + mMaxNumElementsAllowed = 0; + } + idx = prev_idx; + } + } + + void shiftDown(size_t idx) { + // until we find one that is either empty or has zero offset. + // TODO we don't need to move everything, just the last one for the same bucket. + mKeyVals[idx].destroy(*this); + + // until we find one that is either empty or has zero offset. + size_t nextIdx = (idx + 1) & mMask; + while (mInfo[nextIdx] >= 2 * mInfoInc) { + mInfo[idx] = static_cast(mInfo[nextIdx] - mInfoInc); + mKeyVals[idx] = std::move(mKeyVals[nextIdx]); + idx = nextIdx; + nextIdx = (idx + 1) & mMask; + } + + mInfo[idx] = 0; + // don't destroy, we've moved it + // mKeyVals[idx].destroy(*this); + mKeyVals[idx].~Node(); + } + + // copy of find(), except that it returns iterator instead of const_iterator. + template + size_t findIdx(Other const& key) const { + size_t idx; + InfoType info; + keyToIdx(key, idx, info); + + do { + // unrolling this twice gives a bit of a speedup. More unrolling did not help. + if (info == mInfo[idx] && KeyEqual::operator()(key, mKeyVals[idx].getFirst())) { + return idx; + } + next(&info, &idx); + if (info == mInfo[idx] && KeyEqual::operator()(key, mKeyVals[idx].getFirst())) { + return idx; + } + next(&info, &idx); + } while (info <= mInfo[idx]); + + // nothing found! + return mMask + 1; + } + + void cloneData(const unordered_map& o) { + Cloner::value>()(o, *this); + } + + // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized. + // @return index where the element was created + size_t insert_move(Node&& keyval) { + // we don't retry, fail if overflowing + // don't need to check max num elements + if (0 == mMaxNumElementsAllowed && !try_increase_info()) { + throwOverflowError(); + } + + size_t idx; + InfoType info; + keyToIdx(keyval.getFirst(), idx, info); + + // skip forward. Use <= because we are certain that the element is not there. + while (info <= mInfo[idx]) { + idx = (idx + 1) & mMask; + info = static_cast(info + mInfoInc); + } + + // key not found, so we are now exactly where we want to insert it. + auto const insertion_idx = idx; + auto const insertion_info = static_cast(info); + if (0xFF <= insertion_info + mInfoInc) { + mMaxNumElementsAllowed = 0; + } + + // find an empty spot + while (0 != mInfo[idx]) { + next(&info, &idx); + } + + auto& l = mKeyVals[insertion_idx]; + if (idx == insertion_idx) { + ::new (static_cast(&l)) Node(std::move(keyval)); + } else { + shiftUp(idx, insertion_idx); + l = std::move(keyval); + } + + // put at empty spot + mInfo[insertion_idx] = insertion_info; + + ++mNumElements; + return insertion_idx; + } + +public: + using iterator = Iter; + using const_iterator = Iter; + + // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert. This + // tremendously speeds up ctor & dtor of a map that never receives an element. The penalty is + // payed at the first insert, and not before. Lookup of this empty map works because everybody + // points to DummyInfoByte::b. parameter bucket_count is dictated by the standard, but we can + // ignore it. + explicit unordered_map(size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, + const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) + : Hash{h} + , KeyEqual{equal} {} + + template + unordered_map(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, + const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) + : Hash{h} + , KeyEqual{equal} { + insert(first, last); + } + + unordered_map(std::initializer_list init, + size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, + const KeyEqual& equal = KeyEqual{}) + : Hash{h} + , KeyEqual{equal} { + insert(init.begin(), init.end()); + } + + unordered_map(unordered_map&& o) + : Hash{std::move(static_cast(o))} + , KeyEqual{std::move(static_cast(o))} + , DataPool{std::move(static_cast(o))} + , mKeyVals{std::move(o.mKeyVals)} + , mInfo{std::move(o.mInfo)} + , mNumElements{std::move(o.mNumElements)} + , mMask{std::move(o.mMask)} + , mMaxNumElementsAllowed{std::move(o.mMaxNumElementsAllowed)} + , mInfoInc{std::move(o.mInfoInc)} + , mInfoHashShift{std::move(o.mInfoHashShift)} { + // set other's mask to 0 so its destructor won't do anything + o.mMask = 0; + } + + unordered_map& operator=(unordered_map&& o) { + if (&o != this) { + // different, move it + destroy(); + mKeyVals = std::move(o.mKeyVals); + mInfo = std::move(o.mInfo); + mNumElements = std::move(o.mNumElements); + mMask = std::move(o.mMask); + mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); + mInfoInc = std::move(o.mInfoInc); + mInfoHashShift = std::move(o.mInfoHashShift); + Hash::operator=(std::move(static_cast(o))); + KeyEqual::operator=(std::move(static_cast(o))); + DataPool::operator=(std::move(static_cast(o))); + // set other's mask to 0 so its destructor won't do anything + o.mMask = 0; + } + return *this; + } + + unordered_map(const unordered_map& o) + : Hash{static_cast(o)} + , KeyEqual{static_cast(o)} + , DataPool{static_cast(o)} { + + if (!o.empty()) { + // not empty: create an exact copy. it is also possible to just iterate through all + // elements and insert them, but copying is probably faster. + + mKeyVals = static_cast( + detail::assertNotNull(malloc(calcNumBytesTotal(o.mMask + 1)))); + // no need for calloc because clonData does memcpy + mInfo = reinterpret_cast(mKeyVals + o.mMask + 1); + mNumElements = o.mNumElements; + mMask = o.mMask; + mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; + mInfoInc = o.mInfoInc; + mInfoHashShift = o.mInfoHashShift; + cloneData(o); + } + } + + // Creates a copy of the given map. Copy constructor of each entry is used. + unordered_map& operator=(unordered_map const& o) { + if (&o == this) { + // prevent assigning of itself + return *this; + } + + // we keep using the old allocator and not assign the new one, because we want to keep the + // memory available. when it is the same size. + if (o.empty()) { + if (0 == mMask) { + // nothing to do, we are empty too + return *this; + } + + // not empty: destroy what we have there + // clear also resets mInfo to 0, that's sometimes not necessary. + destroy(); + + // we assign an invalid pointer, but this is ok because we never dereference it. + using detail::DummyInfoByte::b; + mKeyVals = reinterpret_cast(&b) - 1; // lgtm [cpp/suspicious-pointer-scaling] + mInfo = reinterpret_cast(&b); + Hash::operator=(static_cast(o)); + KeyEqual::operator=(static_cast(o)); + DataPool::operator=(static_cast(o)); + mNumElements = 0; + mMask = 0; + mMaxNumElementsAllowed = 0; + mInfoInc = InitialInfoInc; + mInfoHashShift = InitialInfoHashShift; + return *this; + } + + // clean up old stuff + Destroyer::value>{}.nodes(*this); + + if (mMask != o.mMask) { + // no luck: we don't have the same array size allocated, so we need to realloc. + if (0 != mMask) { + // only deallocate if we actually have data! + free(mKeyVals); + } + + mKeyVals = static_cast( + detail::assertNotNull(malloc(calcNumBytesTotal(o.mMask + 1)))); + + // no need for calloc here because cloneData performs a memcpy. + mInfo = reinterpret_cast(mKeyVals + o.mMask + 1); + mInfoInc = o.mInfoInc; + mInfoHashShift = o.mInfoHashShift; + // sentinel is set in cloneData + } + Hash::operator=(static_cast(o)); + KeyEqual::operator=(static_cast(o)); + mNumElements = o.mNumElements; + mMask = o.mMask; + mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; + cloneData(o); + + return *this; + } + + // Swaps everything between the two maps. + void swap(unordered_map& o) { + using std::swap; + swap(mKeyVals, o.mKeyVals); + swap(mInfo, o.mInfo); + swap(mNumElements, o.mNumElements); + swap(mMask, o.mMask); + swap(mMaxNumElementsAllowed, o.mMaxNumElementsAllowed); + swap(mInfoInc, o.mInfoInc); + swap(mInfoHashShift, o.mInfoHashShift); + swap(static_cast(*this), static_cast(o)); + swap(static_cast(*this), static_cast(o)); + // no harm done in swapping datapool + swap(static_cast(*this), static_cast(o)); + } + + // Clears all data, without resizing. + void clear() { + if (empty()) { + // don't do anything! also important because we don't want to write to DummyInfoByte::b, + // even though we would just write 0 to it. + return; + } + + Destroyer::value>{}.nodes(*this); + + // clear everything except the sentinel + // std::memset(mInfo, 0, sizeof(uint8_t) * (mMask + 1)); + uint8_t const z = 0; + std::fill(mInfo, mInfo + (sizeof(uint8_t) * (mMask + 1)), z); + + mInfoInc = InitialInfoInc; + mInfoHashShift = InitialInfoHashShift; + } + + // Destroys the map and all it's contents. + ~unordered_map() { + destroy(); + } + + // Checks if both maps contain the same entries. Order is irrelevant. + bool operator==(const unordered_map& other) const { + if (other.size() != size()) { + return false; + } + for (auto const& otherEntry : other) { + auto const myIt = find(otherEntry.first); + if (myIt == end() || !(myIt->second == otherEntry.second)) { + return false; + } + } + + return true; + } + + bool operator!=(const unordered_map& other) const { + return !operator==(other); + } + + mapped_type& operator[](const key_type& key) { + return doCreateByKey(key); + } + + mapped_type& operator[](key_type&& key) { + return doCreateByKey(std::move(key)); + } + + template + void insert(Iter first, Iter last) { + for (; first != last; ++first) { + // value_type ctor needed because this might be called with std::pair's + insert(value_type(*first)); + } + } + + template + std::pair emplace(Args&&... args) { + Node n{*this, std::forward(args)...}; + auto r = doInsert(std::move(n)); + if (!r.second) { + // insertion not possible: destroy node + n.destroy(*this); + } + return r; + } + + std::pair insert(const value_type& keyval) { + return doInsert(keyval); + } + + std::pair insert(value_type&& keyval) { + return doInsert(std::move(keyval)); + } + + // Returns 1 if key is found, 0 otherwise. + size_t count(const key_type& key) const { + return findIdx(key) == (mMask + 1) ? 0 : 1; + } + + // Returns a reference to the value found for key. + // Throws std::out_of_range if element cannot be found + mapped_type& at(key_type const& key) { + auto idx = findIdx(key); + if (idx == mMask + 1) { + doThrow("key not found"); + } + return mKeyVals[idx].getSecond(); + } + + // Returns a reference to the value found for key. + // Throws std::out_of_range if element cannot be found + mapped_type const& at(key_type const& key) const { + auto idx = findIdx(key); + if (idx == mMask + 1) { + doThrow("key not found"); + } + return mKeyVals[idx].getSecond(); + } + + const_iterator find(const key_type& key) const { + const size_t idx = findIdx(key); + return const_iterator{mKeyVals + idx, mInfo + idx}; + } + + template + const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const { + const size_t idx = findIdx(key); + return const_iterator{mKeyVals + idx, mInfo + idx}; + } + + iterator find(const key_type& key) { + const size_t idx = findIdx(key); + return iterator{mKeyVals + idx, mInfo + idx}; + } + + template + iterator find(const OtherKey& key, is_transparent_tag /*unused*/) { + const size_t idx = findIdx(key); + return iterator{mKeyVals + idx, mInfo + idx}; + } + + iterator begin() { + if (empty()) { + return end(); + } + return ++iterator(mKeyVals - 1, mInfo - 1); + } + const_iterator begin() const { + return cbegin(); + } + const_iterator cbegin() const { + if (empty()) { + return cend(); + } + return ++const_iterator(mKeyVals - 1, mInfo - 1); + } + + iterator end() { + // no need to supply valid info pointer: end() must not be dereferenced, and only node + // pointer is compared. + return iterator{reinterpret_cast(mInfo), nullptr}; + } + const_iterator end() const { + return cend(); + } + const_iterator cend() const { + return const_iterator{reinterpret_cast(mInfo), nullptr}; + } + + iterator erase(const_iterator pos) { + // its safe to perform const cast here + return erase(iterator{const_cast(pos.mKeyVals), const_cast(pos.mInfo)}); + } + + // Erases element at pos, returns iterator to the next element. + iterator erase(iterator pos) { + // we assume that pos always points to a valid entry, and not end(). + auto const idx = static_cast(pos.mKeyVals - mKeyVals); + + shiftDown(idx); + --mNumElements; + + if (*pos.mInfo) { + // we've backward shifted, return this again + return pos; + } + + // no backward shift, return next element + return ++pos; + } + + size_t erase(const key_type& key) { + size_t idx; + InfoType info; + keyToIdx(key, idx, info); + + // check while info matches with the source idx + do { + if (info == mInfo[idx] && KeyEqual::operator()(key, mKeyVals[idx].getFirst())) { + shiftDown(idx); + --mNumElements; + return 1; + } + next(&info, &idx); + } while (info <= mInfo[idx]); + + // nothing found to delete + return 0; + } + + size_type size() const { + return mNumElements; + } + + size_type max_size() const { + return static_cast(-1); + } + + bool empty() const { + return 0 == mNumElements; + } + + float max_load_factor() const { + return MaxLoadFactor100 / 100.0f; + } + + // Average number of elements per bucket. Since we allow only 1 per bucket + float load_factor() const { + return static_cast(size()) / (mMask + 1); + } + + size_t mask() const { + return mMask; + } + +private: + ROBIN_HOOD_NOINLINE void throwOverflowError() const { + throw std::overflow_error("csv::map overflow"); + } + + void init_data(size_t max_elements) { + mNumElements = 0; + mMask = max_elements - 1; + mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements); + + // calloc also zeroes everything + mKeyVals = reinterpret_cast( + detail::assertNotNull(calloc(1, calcNumBytesTotal(max_elements)))); + mInfo = reinterpret_cast(mKeyVals + max_elements); + + // set sentinel + mInfo[max_elements] = 1; + } + + template + mapped_type& doCreateByKey(Arg&& key) { + while (true) { + size_t idx; + InfoType info; + keyToIdx(key, idx, info); + nextWhileLess(&info, &idx); + + // while we potentially have a match. Can't do a do-while here because when mInfo is 0 + // we don't want to skip forward + while (info == mInfo[idx]) { + if (KeyEqual::operator()(key, mKeyVals[idx].getFirst())) { + // key already exists, do not insert. + return mKeyVals[idx].getSecond(); + } + next(&info, &idx); + } + + // unlikely that this evaluates to true + if (mNumElements >= mMaxNumElementsAllowed) { + increase_size(); + continue; + } + + // key not found, so we are now exactly where we want to insert it. + auto const insertion_idx = idx; + auto const insertion_info = info; + if (0xFF <= insertion_info + mInfoInc) { + mMaxNumElementsAllowed = 0; + } + + // find an empty spot + while (0 != mInfo[idx]) { + next(&info, &idx); + } + + auto& l = mKeyVals[insertion_idx]; + if (idx == insertion_idx) { + // put at empty spot. This forwards all arguments into the node where the object is + // constructed exactly where it is needed. + ::new (static_cast(&l)) + Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), std::forward_as_tuple()); + } else { + shiftUp(idx, insertion_idx); + l = Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), std::forward_as_tuple()); + } + + // mKeyVals[idx].getFirst() = std::move(key); + mInfo[insertion_idx] = static_cast(insertion_info); + + ++mNumElements; + return mKeyVals[insertion_idx].getSecond(); + } + } + + // This is exactly the same code as operator[], except for the return values + template + std::pair doInsert(Arg&& keyval) { + while (true) { + size_t idx; + InfoType info; + keyToIdx(keyval.getFirst(), idx, info); + nextWhileLess(&info, &idx); + + // while we potentially have a match + while (info == mInfo[idx]) { + if (KeyEqual::operator()(keyval.getFirst(), mKeyVals[idx].getFirst())) { + // key already exists, do NOT insert. + // see http://en.cppreference.com/w/cpp/container/unordered_map/insert + return std::make_pair(iterator(mKeyVals + idx, mInfo + idx), + false); + } + next(&info, &idx); + } + + // unlikely that this evaluates to true + if (mNumElements >= mMaxNumElementsAllowed) { + increase_size(); + continue; + } + + // key not found, so we are now exactly where we want to insert it. + size_t const insertion_idx = idx; + auto const insertion_info = static_cast(info); + if (0xFF <= insertion_info + mInfoInc) { + mMaxNumElementsAllowed = 0; + } + + // find an empty spot + while (0 != mInfo[idx]) { + next(&info, &idx); + } + + auto& l = mKeyVals[insertion_idx]; + if (idx == insertion_idx) { + ::new (static_cast(&l)) Node(*this, std::forward(keyval)); + } else { + shiftUp(idx, insertion_idx); + l = Node(*this, std::forward(keyval)); + } + + // put at empty spot + mInfo[insertion_idx] = insertion_info; + + ++mNumElements; + return std::make_pair(iterator(mKeyVals + insertion_idx, mInfo + insertion_idx), true); + } + } + + size_t calcMaxNumElementsAllowed(size_t maxElements) { + static constexpr size_t overflowLimit = (std::numeric_limits::max)() / 100; + static constexpr double factor = MaxLoadFactor100 / 100.0; + + // make sure we can't get an overflow; use floatingpoint arithmetic if necessary. + if (maxElements > overflowLimit) { + return static_cast(static_cast(maxElements) * factor); + } else { + return (maxElements * MaxLoadFactor100) / 100; + } + } + + bool try_increase_info() { + ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements + << ", maxNumElementsAllowed=" + << calcMaxNumElementsAllowed(mMask + 1)); + // we got space left, try to make info smaller + mInfoInc = static_cast(mInfoInc >> 1); + if (1 == mInfoInc) { + // need to be > 1 so that shift works (otherwise undefined behavior!) + return false; + } + + // remove one bit of the hash, leaving more space for the distance info. + // This is extremely fast because we can operate on 8 bytes at once. + ++mInfoHashShift; + auto const data = reinterpret_cast(mInfo); + auto const numEntries = (mMask + 1) / 8; + + for (size_t i = 0; i < numEntries; ++i) { + data[i] = (data[i] >> 1) & UINT64_C(0x7f7f7f7f7f7f7f7f); + } + mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); + return true; + } + + void increase_size() { + // nothing allocated yet? just allocate 4 elements + if (0 == mMask) { + init_data(InitialNumElements); + return; + } + + auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); + if (mNumElements < maxNumElementsAllowed && try_increase_info()) { + return; + } + + ROBIN_HOOD_LOG("mNumElements=" << mNumElements + << ", maxNumElementsAllowed=" << maxNumElementsAllowed); + // it seems we have a really bad hash function! don't try to resize again + if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) { + throwOverflowError(); + } + + // std::cout << (100.0*mNumElements / (mMask + 1)) << "% full, resizing" << std::endl; + Node* const oldKeyVals = mKeyVals; + uint8_t const* const oldInfo = mInfo; + + const size_t oldMaxElements = mMask + 1; + + // resize operation: move stuff + init_data(oldMaxElements * 2); + + mInfoInc = InitialInfoInc; + mInfoHashShift = InitialInfoHashShift; + for (size_t i = 0; i < oldMaxElements; ++i) { + if (oldInfo[i] != 0) { + insert_move(std::move(oldKeyVals[i])); + // destroy the node but DON'T destroy the data. + oldKeyVals[i].~Node(); + } + } + + // don't destroy old data: put it into the pool instead + DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElements)); + } + + void destroy() { + if (0 == mMask) { + // don't deallocate! we are pointing to DummyInfoByte::b. + return; + } + + Destroyer::value>{} + .nodesDoNotDeallocate(*this); + free(mKeyVals); + } + + // members are sorted so no padding occurs + Node* mKeyVals = reinterpret_cast(reinterpret_cast(&detail::DummyInfoByte::b) - + sizeof(Node)); // 8 byte 8 + uint8_t* mInfo = reinterpret_cast(&detail::DummyInfoByte::b); // 8 byte 16 + size_t mNumElements = 0; // 8 byte 24 + size_t mMask = 0; // 8 byte 32 + size_t mMaxNumElementsAllowed = 0; // 8 byte 40 + InfoType mInfoInc = InitialInfoInc; // 4 byte 44 + InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 48 + // 16 byte 56 if NodeAllocator +}; + +} // namespace detail + +template , + typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> +using unordered_flat_map = detail::unordered_map; + +template , + typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> +using unordered_node_map = detail::unordered_map; + +template , + typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> +using unordered_map = + detail::unordered_map) <= sizeof(size_t) * 6 && + std::is_nothrow_move_constructible>::value && + std::is_nothrow_move_assignable>::value, + MaxLoadFactor100, Key, T, Hash, KeyEqual>; + +} // namespace csv, + +#endif diff --git a/include/writer.hpp b/include/csv/writer.hpp similarity index 80% rename from include/writer.hpp rename to include/csv/writer.hpp index f0cb020..5519abb 100644 --- a/include/writer.hpp +++ b/include/csv/writer.hpp @@ -30,9 +30,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once -#include -#include -#include +#include +#include +#include #include #include #include @@ -57,25 +57,25 @@ class Writer { header_written_(false), current_dialect_name_("excel") { file_stream.open(file_name); - std::shared_ptr unix_dialect = std::make_shared(); + Dialect unix_dialect; unix_dialect - ->delimiter(",") + .delimiter(",") .quote_character('"') .double_quote(true) .header(true); dialects_["unix"] = unix_dialect; - std::shared_ptr excel_dialect = std::make_shared(); + Dialect excel_dialect; excel_dialect - ->delimiter(",") + .delimiter(",") .quote_character('"') .double_quote(true) .header(true); dialects_["excel"] = excel_dialect; - std::shared_ptr excel_tab_dialect = std::make_shared(); + Dialect excel_tab_dialect; excel_tab_dialect - ->delimiter("\t") + .delimiter("\t") .quote_character('"') .double_quote(true) .header(true); @@ -91,13 +91,12 @@ class Writer { Dialect& configure_dialect(const std::string& dialect_name = "excel") { if (dialects_.find(dialect_name) != dialects_.end()) { - return *dialects_[dialect_name]; + return dialects_[dialect_name]; } else { - std::shared_ptr dialect_object = std::make_shared(); - dialects_[dialect_name] = dialect_object; + dialects_[dialect_name] = Dialect(); current_dialect_name_ = dialect_name; - return *dialect_object; + return dialects_[dialect_name]; } } @@ -109,7 +108,7 @@ class Writer { } Dialect& get_dialect(const std::string& dialect_name) { - return *(dialects_[dialect_name]); + return dialects_[dialect_name]; } void use_dialect(const std::string& dialect_name) { @@ -124,7 +123,7 @@ class Writer { || is_specialization::value, void>::type write_row(T row_map) { std::vector row_entries; - auto column_names = dialects_[current_dialect_name_]->column_names_; + auto column_names = dialects_[current_dialect_name_].column_names_; for (size_t i = 0; i < column_names.size(); i++) { row_entries.push_back(row_map[column_names[i]]); } @@ -133,16 +132,16 @@ class Writer { for (size_t i = 0; i < current_row_entries_.size(); i++) { row += current_row_entries_[i]; if (i + 1 < current_row_entries_.size()) - row += dialects_[current_dialect_name_]->delimiter_; + row += dialects_[current_dialect_name_].delimiter_; } - row += dialects_[current_dialect_name_]->line_terminator_; + row += dialects_[current_dialect_name_].line_terminator_; queue.enqueue(row); current_row_entries_.clear(); } - void write_row(robin_map row_map) { + void write_row(unordered_flat_map row_map) { std::vector row_entries; - auto column_names = dialects_[current_dialect_name_]->column_names_; + auto column_names = dialects_[current_dialect_name_].column_names_; for (size_t i = 0; i < column_names.size(); i++) { row_entries.push_back(row_map[column_names[i]]); } @@ -151,9 +150,9 @@ class Writer { for (size_t i = 0; i < current_row_entries_.size(); i++) { row += current_row_entries_[i]; if (i + 1 < current_row_entries_.size()) - row += dialects_[current_dialect_name_]->delimiter_; + row += dialects_[current_dialect_name_].delimiter_; } - row += dialects_[current_dialect_name_]->line_terminator_; + row += dialects_[current_dialect_name_].line_terminator_; queue.enqueue(row); current_row_entries_.clear(); } @@ -164,9 +163,9 @@ class Writer { for (size_t i = 0; i < current_row_entries_.size(); i++) { row += current_row_entries_[i]; if (i + 1 < current_row_entries_.size()) - row += dialects_[current_dialect_name_]->delimiter_; + row += dialects_[current_dialect_name_].delimiter_; } - row += dialects_[current_dialect_name_]->line_terminator_; + row += dialects_[current_dialect_name_].line_terminator_; queue.enqueue(row); current_row_entries_.clear(); } @@ -180,9 +179,9 @@ class Writer { for (size_t i = 0; i < current_row_entries_.size(); i++) { row += current_row_entries_[i]; if (i + 1 < current_row_entries_.size()) - row += dialects_[current_dialect_name_]->delimiter_; + row += dialects_[current_dialect_name_].delimiter_; } - row += dialects_[current_dialect_name_]->line_terminator_; + row += dialects_[current_dialect_name_].line_terminator_; queue.enqueue(row); current_row_entries_.clear(); } @@ -204,11 +203,11 @@ class Writer { void write_header() { auto dialect = dialects_[current_dialect_name_]; - auto column_names = dialect->column_names_; + auto column_names = dialect.column_names_; if (column_names.size() == 0) return; - auto delimiter = dialect->delimiter_; - auto line_terminator = dialect->line_terminator_; + auto delimiter = dialect.delimiter_; + auto line_terminator = dialect.line_terminator_; std::string row; for (size_t i = 0; i < column_names.size(); i++) { row += column_names[i]; @@ -242,8 +241,8 @@ class Writer { std::future done_future; ConcurrentQueue queue; std::string current_dialect_name_; - robin_map> dialects_; - std::shared_ptr current_dialect_; + unordered_flat_map dialects_; + Dialect current_dialect_; std::vector current_row_entries_; bool header_written_; }; diff --git a/include/robin_growth_policy.hpp b/include/robin_growth_policy.hpp deleted file mode 100644 index ac6460c..0000000 --- a/include/robin_growth_policy.hpp +++ /dev/null @@ -1,324 +0,0 @@ -/** - * MIT License - * - * Copyright (c) 2017 Tessil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TSL_ROBIN_GROWTH_POLICY_H -#define TSL_ROBIN_GROWTH_POLICY_H - - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#ifdef TSL_DEBUG -# define tsl_rh_assert(expr) assert(expr) -#else -# define tsl_rh_assert(expr) (static_cast(0)) -#endif - - -/** - * If exceptions are enabled, throw the exception passed in parameter, otherwise call std::terminate. - */ -#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (defined (_MSC_VER) && defined (_CPPUNWIND))) && !defined(TSL_NO_EXCEPTIONS) -# define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg) -#else -# ifdef NDEBUG -# define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate() -# else -# include -# define TSL_RH_THROW_OR_TERMINATE(ex, msg) do { std::fprintf(stderr, msg); std::terminate(); } while(0) -# endif -#endif - - -#if defined(__GNUC__) || defined(__clang__) -# define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true)) -#else -# define TSL_RH_LIKELY(exp) (exp) -#endif - - -namespace csv { -namespace rh { - -/** - * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows - * the table to use a mask operation instead of a modulo operation to map a hash to a bucket. - * - * GrowthFactor must be a power of two >= 2. - */ -template -class power_of_two_growth_policy { -public: - /** - * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter. - * This number is a minimum, the policy may update this value with a higher value if needed (but not lower). - * - * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and - * bucket_for_hash must always return 0 in this case. - */ - explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { - if(min_bucket_count_in_out > max_bucket_count()) { - TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); - } - - if(min_bucket_count_in_out > 0) { - min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out); - m_mask = min_bucket_count_in_out - 1; - } - else { - m_mask = 0; - } - } - - /** - * Return the bucket [0, bucket_count()) to which the hash belongs. - * If bucket_count() is 0, it must always return 0. - */ - std::size_t bucket_for_hash(std::size_t hash) const noexcept { - return hash & m_mask; - } - - /** - * Return the number of buckets that should be used on next growth. - */ - std::size_t next_bucket_count() const { - if((m_mask + 1) > max_bucket_count() / GrowthFactor) { - TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); - } - - return (m_mask + 1) * GrowthFactor; - } - - /** - * Return the maximum number of buckets supported by the policy. - */ - std::size_t max_bucket_count() const { - // Largest power of two. - return (std::numeric_limits::max() / 2) + 1; - } - - /** - * Reset the growth policy as if it was created with a bucket count of 0. - * After a clear, the policy must always return 0 when bucket_for_hash is called. - */ - void clear() noexcept { - m_mask = 0; - } - -private: - static std::size_t round_up_to_power_of_two(std::size_t value) { - if(is_power_of_two(value)) { - return value; - } - - if(value == 0) { - return 1; - } - - --value; - for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { - value |= value >> i; - } - - return value + 1; - } - - static constexpr bool is_power_of_two(std::size_t value) { - return value != 0 && (value & (value - 1)) == 0; - } - -protected: - static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2."); - - std::size_t m_mask; -}; - - -/** - * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash - * to a bucket. Slower but it can be useful if you want a slower growth. - */ -template> -class mod_growth_policy { -public: - explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) { - if(min_bucket_count_in_out > max_bucket_count()) { - TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); - } - - if(min_bucket_count_in_out > 0) { - m_mod = min_bucket_count_in_out; - } - else { - m_mod = 1; - } - } - - std::size_t bucket_for_hash(std::size_t hash) const noexcept { - return hash % m_mod; - } - - std::size_t next_bucket_count() const { - if(m_mod == max_bucket_count()) { - TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); - } - - const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); - if(!std::isnormal(next_bucket_count)) { - TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); - } - - if(next_bucket_count > double(max_bucket_count())) { - return max_bucket_count(); - } - else { - return std::size_t(next_bucket_count); - } - } - - std::size_t max_bucket_count() const { - return MAX_BUCKET_COUNT; - } - - void clear() noexcept { - m_mod = 1; - } - -private: - static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den; - static const std::size_t MAX_BUCKET_COUNT = - std::size_t(double( - std::numeric_limits::max() / REHASH_SIZE_MULTIPLICATION_FACTOR - )); - - static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1."); - - std::size_t m_mod; -}; - - - -namespace detail { - -static constexpr const std::array PRIMES = {{ - 1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul, - 1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, - 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, - 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul -}}; - -template -static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; } - -// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the -// compiler can optimize the modulo code better with a constant known at the compilation. -static constexpr const std::array MOD_PRIME = {{ - &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, - &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, - &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, - &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39> -}}; - -} - -/** - * Grow the hash table by using prime numbers as bucket count. Slower than csv::rh::power_of_two_growth_policy in - * general but will probably distribute the values around better in the buckets with a poor hash function. - * - * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers. - * - * With a switch the code would look like: - * \code - * switch(iprime) { // iprime is the current prime of the hash table - * case 0: hash % 5ul; - * break; - * case 1: hash % 17ul; - * break; - * case 2: hash % 29ul; - * break; - * ... - * } - * \endcode - * - * Due to the constant variable in the modulo the compiler is able to optimize the operation - * by a series of multiplications, substractions and shifts. - * - * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement. - */ -class prime_growth_policy { -public: - explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) { - auto it_prime = std::lower_bound(detail::PRIMES.begin(), - detail::PRIMES.end(), min_bucket_count_in_out); - if(it_prime == detail::PRIMES.end()) { - TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); - } - - m_iprime = static_cast(std::distance(detail::PRIMES.begin(), it_prime)); - if(min_bucket_count_in_out > 0) { - min_bucket_count_in_out = *it_prime; - } - else { - min_bucket_count_in_out = 0; - } - } - - std::size_t bucket_for_hash(std::size_t hash) const noexcept { - return detail::MOD_PRIME[m_iprime](hash); - } - - std::size_t next_bucket_count() const { - if(m_iprime + 1 >= detail::PRIMES.size()) { - TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); - } - - return detail::PRIMES[m_iprime + 1]; - } - - std::size_t max_bucket_count() const { - return detail::PRIMES.back(); - } - - void clear() noexcept { - m_iprime = 0; - } - -private: - unsigned int m_iprime; - - static_assert(std::numeric_limits::max() >= detail::PRIMES.size(), - "The type of m_iprime is not big enough."); -}; - -} -} - -#endif diff --git a/include/robin_hash.hpp b/include/robin_hash.hpp deleted file mode 100644 index 191e4d9..0000000 --- a/include/robin_hash.hpp +++ /dev/null @@ -1,1338 +0,0 @@ -/** - * MIT License - * - * Copyright (c) 2017 Tessil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TSL_ROBIN_HASH_H -#define TSL_ROBIN_HASH_H - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace csv { - -namespace detail_robin_hash { - -template -struct make_void { - using type = void; -}; - -template -struct has_is_transparent: std::false_type { -}; - -template -struct has_is_transparent::type>: std::true_type { -}; - -template -struct is_power_of_two_policy: std::false_type { -}; - -template -struct is_power_of_two_policy>: std::true_type { -}; - - - -using truncated_hash_type = std::uint_least32_t; - -/** - * Helper class that stores a truncated hash if StoreHash is true and nothing otherwise. - */ -template -class bucket_entry_hash { -public: - bool bucket_hash_equal(std::size_t /*hash*/) const noexcept { - return true; - } - - truncated_hash_type truncated_hash() const noexcept { - return 0; - } - -protected: - void set_hash(truncated_hash_type /*hash*/) noexcept { - } -}; - -template<> -class bucket_entry_hash { -public: - bool bucket_hash_equal(std::size_t hash) const noexcept { - return m_hash == truncated_hash_type(hash); - } - - truncated_hash_type truncated_hash() const noexcept { - return m_hash; - } - -protected: - void set_hash(truncated_hash_type hash) noexcept { - m_hash = truncated_hash_type(hash); - } - -private: - truncated_hash_type m_hash; -}; - - -/** - * Each bucket entry has: - * - A value of type `ValueType`. - * - An integer to store how far the value of the bucket, if any, is from its ideal bucket - * (ex: if the current bucket 5 has the value 'foo' and `hash('foo') % nb_buckets` == 3, - * `dist_from_ideal_bucket()` will return 2 as the current value of the bucket is two - * buckets away from its ideal bucket) - * If there is no value in the bucket (i.e. `empty()` is true) `dist_from_ideal_bucket()` will be < 0. - * - A marker which tells us if the bucket is the last bucket of the bucket array (useful for the - * iterator of the hash table). - * - If `StoreHash` is true, 32 bits of the hash of the value, if any, are also stored in the bucket. - * If the size of the hash is more than 32 bits, it is truncated. We don't store the full hash - * as storing the hash is a potential opportunity to use the unused space due to the alignement - * of the bucket_entry structure. We can thus potentially store the hash without any extra space - * (which would not be possible with 64 bits of the hash). - */ -template -class bucket_entry: public bucket_entry_hash { - using bucket_hash = bucket_entry_hash; - -public: - using value_type = ValueType; - using distance_type = std::int_least16_t; - - - bucket_entry() noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), - m_last_bucket(false) - { - tsl_rh_assert(empty()); - } - - bucket_entry(bool last_bucket) noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), - m_last_bucket(last_bucket) - { - tsl_rh_assert(empty()); - } - - bucket_entry(const bucket_entry& other) noexcept(std::is_nothrow_copy_constructible::value): - bucket_hash(other), - m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), - m_last_bucket(other.m_last_bucket) - { - if(!other.empty()) { - ::new (static_cast(std::addressof(m_value))) value_type(other.value()); - m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; - } - } - - /** - * Never really used, but still necessary as we must call resize on an empty `std::vector`. - * and we need to support move-only types. See robin_hash constructor for details. - */ - bucket_entry(bucket_entry&& other) noexcept(std::is_nothrow_move_constructible::value): - bucket_hash(std::move(other)), - m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), - m_last_bucket(other.m_last_bucket) - { - if(!other.empty()) { - ::new (static_cast(std::addressof(m_value))) value_type(std::move(other.value())); - m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; - } - } - - bucket_entry& operator=(const bucket_entry& other) - noexcept(std::is_nothrow_copy_constructible::value) - { - if(this != &other) { - clear(); - - bucket_hash::operator=(other); - if(!other.empty()) { - ::new (static_cast(std::addressof(m_value))) value_type(other.value()); - } - - m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; - m_last_bucket = other.m_last_bucket; - } - - return *this; - } - - bucket_entry& operator=(bucket_entry&& ) = delete; - - ~bucket_entry() noexcept { - clear(); - } - - void clear() noexcept { - if(!empty()) { - destroy_value(); - m_dist_from_ideal_bucket = EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET; - } - } - - bool empty() const noexcept { - return m_dist_from_ideal_bucket == EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET; - } - - value_type& value() noexcept { - tsl_rh_assert(!empty()); - return *reinterpret_cast(std::addressof(m_value)); - } - - const value_type& value() const noexcept { - tsl_rh_assert(!empty()); - return *reinterpret_cast(std::addressof(m_value)); - } - - distance_type dist_from_ideal_bucket() const noexcept { - return m_dist_from_ideal_bucket; - } - - bool last_bucket() const noexcept { - return m_last_bucket; - } - - void set_as_last_bucket() noexcept { - m_last_bucket = true; - } - - template - void set_value_of_empty_bucket(distance_type dist_from_ideal_bucket, - truncated_hash_type hash, Args&&... value_type_args) - { - tsl_rh_assert(dist_from_ideal_bucket >= 0); - tsl_rh_assert(empty()); - - ::new (static_cast(std::addressof(m_value))) value_type(std::forward(value_type_args)...); - this->set_hash(hash); - m_dist_from_ideal_bucket = dist_from_ideal_bucket; - - tsl_rh_assert(!empty()); - } - - void swap_with_value_in_bucket(distance_type& dist_from_ideal_bucket, - truncated_hash_type& hash, value_type& value) - { - tsl_rh_assert(!empty()); - - using std::swap; - swap(value, this->value()); - swap(dist_from_ideal_bucket, m_dist_from_ideal_bucket); - - // Avoid warning of unused variable if StoreHash is false - (void) hash; - if(StoreHash) { - const truncated_hash_type tmp_hash = this->truncated_hash(); - this->set_hash(hash); - hash = tmp_hash; - } - } - - static truncated_hash_type truncate_hash(std::size_t hash) noexcept { - return truncated_hash_type(hash); - } - -private: - void destroy_value() noexcept { - tsl_rh_assert(!empty()); - value().~value_type(); - } - -private: - using storage = typename std::aligned_storage::type; - - static const distance_type EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET = -1; - - distance_type m_dist_from_ideal_bucket; - bool m_last_bucket; - storage m_value; -}; - - - -/** - * Internal common class used by `robin_map` and `robin_set`. - * - * ValueType is what will be stored by `robin_hash` (usually `std::pair` for map and `Key` for set). - * - * `KeySelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a - * reference to the key. - * - * `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a - * reference to the value. `ValueSelect` should be void if there is no value (in a set for example). - * - * The strong exception guarantee only holds if the expression - * `std::is_nothrow_swappable::value && std::is_nothrow_move_constructible::value` is true. - * - * Behaviour is undefined if the destructor of `ValueType` throws. - */ -template -class robin_hash: private Hash, private KeyEqual, private GrowthPolicy { -private: - template - using has_mapped_type = typename std::integral_constant::value>; - - static_assert(noexcept(std::declval().bucket_for_hash(std::size_t(0))), "GrowthPolicy::bucket_for_hash must be noexcept."); - static_assert(noexcept(std::declval().clear()), "GrowthPolicy::clear must be noexcept."); - -public: - template - class robin_iterator; - - using key_type = typename KeySelect::key_type; - using value_type = ValueType; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using hasher = Hash; - using key_equal = KeyEqual; - using allocator_type = Allocator; - using reference = value_type&; - using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; - using iterator = robin_iterator; - using const_iterator = robin_iterator; - - -private: - /** - * Either store the hash because we are asked by the `StoreHash` template parameter - * or store the hash because it doesn't cost us anything in size and can be used to speed up rehash. - */ - static constexpr bool STORE_HASH = StoreHash || - ( - (sizeof(csv::detail_robin_hash::bucket_entry) == - sizeof(csv::detail_robin_hash::bucket_entry)) - && - (sizeof(std::size_t) == sizeof(truncated_hash_type) || - is_power_of_two_policy::value) - && - // Don't store the hash for primitive types with default hash. - (!std::is_arithmetic::value || - !std::is_same>::value) - ); - - /** - * Only use the stored hash on lookup if we are explictly asked. We are not sure how slow - * the KeyEqual operation is. An extra comparison may slow things down with a fast KeyEqual. - */ - static constexpr bool USE_STORED_HASH_ON_LOOKUP = StoreHash; - - /** - * We can only use the hash on rehash if the size of the hash type is the same as the stored one or - * if we use a power of two modulo. In the case of the power of two modulo, we just mask - * the least significant bytes, we just have to check that the truncated_hash_type didn't truncated - * more bytes. - */ - static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) { - (void) bucket_count; - if(STORE_HASH && sizeof(std::size_t) == sizeof(truncated_hash_type)) { - return true; - } - else if(STORE_HASH && is_power_of_two_policy::value) { - tsl_rh_assert(bucket_count > 0); - return (bucket_count - 1) <= std::numeric_limits::max(); - } - else { - return false; - } - } - - using bucket_entry = csv::detail_robin_hash::bucket_entry; - using distance_type = typename bucket_entry::distance_type; - - using buckets_allocator = typename std::allocator_traits::template rebind_alloc; - using buckets_container_type = std::vector; - - -public: - /** - * The 'operator*()' and 'operator->()' methods return a const reference and const pointer respectively to the - * stored value type. - * - * In case of a map, to get a mutable reference to the value associated to a key (the '.second' in the - * stored pair), you have to call 'value()'. - * - * The main reason for this is that if we returned a `std::pair&` instead - * of a `const std::pair&`, the user may modify the key which will put the map in a undefined state. - */ - template - class robin_iterator { - friend class robin_hash; - - private: - using bucket_entry_ptr = typename std::conditional::type; - - - robin_iterator(bucket_entry_ptr bucket) noexcept: m_bucket(bucket) { - } - - public: - using iterator_category = std::forward_iterator_tag; - using value_type = const typename robin_hash::value_type; - using difference_type = std::ptrdiff_t; - using reference = value_type&; - using pointer = value_type*; - - - robin_iterator() noexcept { - } - - // Copy constructor from iterator to const_iterator. - template::type* = nullptr> - robin_iterator(const robin_iterator& other) noexcept: m_bucket(other.m_bucket) { - } - - robin_iterator(const robin_iterator& other) = default; - robin_iterator(robin_iterator&& other) = default; - robin_iterator& operator=(const robin_iterator& other) = default; - robin_iterator& operator=(robin_iterator&& other) = default; - - const typename robin_hash::key_type& key() const { - return KeySelect()(m_bucket->value()); - } - - template::value && IsConst>::type* = nullptr> - const typename U::value_type& value() const { - return U()(m_bucket->value()); - } - - template::value && !IsConst>::type* = nullptr> - typename U::value_type& value() { - return U()(m_bucket->value()); - } - - reference operator*() const { - return m_bucket->value(); - } - - pointer operator->() const { - return std::addressof(m_bucket->value()); - } - - robin_iterator& operator++() { - while(true) { - if(m_bucket->last_bucket()) { - ++m_bucket; - return *this; - } - - ++m_bucket; - if(!m_bucket->empty()) { - return *this; - } - } - } - - robin_iterator operator++(int) { - robin_iterator tmp(*this); - ++*this; - - return tmp; - } - - friend bool operator==(const robin_iterator& lhs, const robin_iterator& rhs) { - return lhs.m_bucket == rhs.m_bucket; - } - - friend bool operator!=(const robin_iterator& lhs, const robin_iterator& rhs) { - return !(lhs == rhs); - } - - private: - bucket_entry_ptr m_bucket; - }; - - -public: -#if defined(__cplusplus) && __cplusplus >= 201402L - robin_hash(size_type bucket_count, - const Hash& hash, - const KeyEqual& equal, - const Allocator& alloc, - float max_load_factor): Hash(hash), - KeyEqual(equal), - GrowthPolicy(bucket_count), - m_buckets_data( - ((bucket_count > max_bucket_count())? - TSL_RH_THROW_OR_TERMINATE(std::length_error, "The map exceeds its maxmimum bucket count."): - bucket_count), - alloc - ), - m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()), - m_bucket_count(bucket_count), - m_nb_elements(0), - m_grow_on_next_insert(false) - { - if(m_bucket_count > 0) { - tsl_rh_assert(!m_buckets_data.empty()); - m_buckets_data.back().set_as_last_bucket(); - } - - - this->max_load_factor(max_load_factor); - } -#else - /** - * C++11 doesn't support the creation of a std::vector with a custom allocator and 'count' default-inserted elements. - * The needed contructor `explicit vector(size_type count, const Allocator& alloc = Allocator());` is only - * available in C++14 and later. We thus must resize after using the `vector(const Allocator& alloc)` constructor. - * - * We can't use `vector(size_type count, const T& value, const Allocator& alloc)` as it requires the - * value T to be copyable. - */ - robin_hash(size_type bucket_count, - const Hash& hash, - const KeyEqual& equal, - const Allocator& alloc, - float max_load_factor): Hash(hash), - KeyEqual(equal), - GrowthPolicy(bucket_count), - m_buckets_data(alloc), - m_buckets(static_empty_bucket_ptr()), - m_bucket_count(bucket_count), - m_nb_elements(0), - m_grow_on_next_insert(false) - { - if(bucket_count > max_bucket_count()) { - TSL_RH_THROW_OR_TERMINATE(std::length_error, "The map exceeds its maxmimum bucket count."); - } - - if(m_bucket_count > 0) { - m_buckets_data.resize(m_bucket_count); - m_buckets = m_buckets_data.data(); - - tsl_rh_assert(!m_buckets_data.empty()); - m_buckets_data.back().set_as_last_bucket(); - } - - - this->max_load_factor(max_load_factor); - } -#endif - - robin_hash(const robin_hash& other): Hash(other), - KeyEqual(other), - GrowthPolicy(other), - m_buckets_data(other.m_buckets_data), - m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()), - m_bucket_count(other.m_bucket_count), - m_nb_elements(other.m_nb_elements), - m_load_threshold(other.m_load_threshold), - m_max_load_factor(other.m_max_load_factor), - m_grow_on_next_insert(other.m_grow_on_next_insert) - { - } - - robin_hash(robin_hash&& other) noexcept(std::is_nothrow_move_constructible::value && - std::is_nothrow_move_constructible::value && - std::is_nothrow_move_constructible::value && - std::is_nothrow_move_constructible::value) - : Hash(std::move(static_cast(other))), - KeyEqual(std::move(static_cast(other))), - GrowthPolicy(std::move(static_cast(other))), - m_buckets_data(std::move(other.m_buckets_data)), - m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()), - m_bucket_count(other.m_bucket_count), - m_nb_elements(other.m_nb_elements), - m_load_threshold(other.m_load_threshold), - m_max_load_factor(other.m_max_load_factor), - m_grow_on_next_insert(other.m_grow_on_next_insert) - { - other.GrowthPolicy::clear(); - other.m_buckets_data.clear(); - other.m_buckets = static_empty_bucket_ptr(); - other.m_bucket_count = 0; - other.m_nb_elements = 0; - other.m_load_threshold = 0; - other.m_grow_on_next_insert = false; - } - - robin_hash& operator=(const robin_hash& other) { - if(&other != this) { - Hash::operator=(other); - KeyEqual::operator=(other); - GrowthPolicy::operator=(other); - - m_buckets_data = other.m_buckets_data; - m_buckets = m_buckets_data.empty()?static_empty_bucket_ptr(): - m_buckets_data.data(); - m_bucket_count = other.m_bucket_count; - m_nb_elements = other.m_nb_elements; - m_load_threshold = other.m_load_threshold; - m_max_load_factor = other.m_max_load_factor; - m_grow_on_next_insert = other.m_grow_on_next_insert; - } - - return *this; - } - - robin_hash& operator=(robin_hash&& other) { - other.swap(*this); - other.clear(); - - return *this; - } - - allocator_type get_allocator() const { - return m_buckets_data.get_allocator(); - } - - - /* - * Iterators - */ - iterator begin() noexcept { - std::size_t i = 0; - while(i < m_bucket_count && m_buckets[i].empty()) { - i++; - } - - return iterator(m_buckets + i); - } - - const_iterator begin() const noexcept { - return cbegin(); - } - - const_iterator cbegin() const noexcept { - std::size_t i = 0; - while(i < m_bucket_count && m_buckets[i].empty()) { - i++; - } - - return const_iterator(m_buckets + i); - } - - iterator end() noexcept { - return iterator(m_buckets + m_bucket_count); - } - - const_iterator end() const noexcept { - return cend(); - } - - const_iterator cend() const noexcept { - return const_iterator(m_buckets + m_bucket_count); - } - - - /* - * Capacity - */ - bool empty() const noexcept { - return m_nb_elements == 0; - } - - size_type size() const noexcept { - return m_nb_elements; - } - - size_type max_size() const noexcept { - return m_buckets_data.max_size(); - } - - /* - * Modifiers - */ - void clear() noexcept { - for(auto& bucket: m_buckets_data) { - bucket.clear(); - } - - m_nb_elements = 0; - m_grow_on_next_insert = false; - } - - - - template - std::pair insert(P&& value) { - return insert_impl(KeySelect()(value), std::forward

(value)); - } - - template - iterator insert_hint(const_iterator hint, P&& value) { - if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { - return mutable_iterator(hint); - } - - return insert(std::forward

(value)).first; - } - - template - void insert(InputIt first, InputIt last) { - if(std::is_base_of::iterator_category>::value) - { - const auto nb_elements_insert = std::distance(first, last); - const size_type nb_free_buckets = m_load_threshold - size(); - tsl_rh_assert(m_load_threshold >= size()); - - if(nb_elements_insert > 0 && nb_free_buckets < size_type(nb_elements_insert)) { - reserve(size() + size_type(nb_elements_insert)); - } - } - - for(; first != last; ++first) { - insert(*first); - } - } - - - - template - std::pair insert_or_assign(K&& key, M&& obj) { - auto it = try_emplace(std::forward(key), std::forward(obj)); - if(!it.second) { - it.first.value() = std::forward(obj); - } - - return it; - } - - template - iterator insert_or_assign(const_iterator hint, K&& key, M&& obj) { - if(hint != cend() && compare_keys(KeySelect()(*hint), key)) { - auto it = mutable_iterator(hint); - it.value() = std::forward(obj); - - return it; - } - - return insert_or_assign(std::forward(key), std::forward(obj)).first; - } - - - template - std::pair emplace(Args&&... args) { - return insert(value_type(std::forward(args)...)); - } - - template - iterator emplace_hint(const_iterator hint, Args&&... args) { - return insert_hint(hint, value_type(std::forward(args)...)); - } - - - - template - std::pair try_emplace(K&& key, Args&&... args) { - return insert_impl(key, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(args)...)); - } - - template - iterator try_emplace_hint(const_iterator hint, K&& key, Args&&... args) { - if(hint != cend() && compare_keys(KeySelect()(*hint), key)) { - return mutable_iterator(hint); - } - - return try_emplace(std::forward(key), std::forward(args)...).first; - } - - /** - * Here to avoid `template size_type erase(const K& key)` being used when - * we use an `iterator` instead of a `const_iterator`. - */ - iterator erase(iterator pos) { - erase_from_bucket(pos); - - /** - * Erase bucket used a backward shift after clearing the bucket. - * Check if there is a new value in the bucket, if not get the next non-empty. - */ - if(pos.m_bucket->empty()) { - ++pos; - } - - return pos; - } - - iterator erase(const_iterator pos) { - return erase(mutable_iterator(pos)); - } - - iterator erase(const_iterator first, const_iterator last) { - if(first == last) { - return mutable_iterator(first); - } - - auto first_mutable = mutable_iterator(first); - auto last_mutable = mutable_iterator(last); - for(auto it = first_mutable.m_bucket; it != last_mutable.m_bucket; ++it) { - if(!it->empty()) { - it->clear(); - m_nb_elements--; - } - } - - if(last_mutable == end()) { - return end(); - } - - - /* - * Backward shift on the values which come after the deleted values. - * We try to move the values closer to their ideal bucket. - */ - std::size_t icloser_bucket = static_cast(first_mutable.m_bucket - m_buckets); - std::size_t ito_move_closer_value = static_cast(last_mutable.m_bucket - m_buckets); - tsl_rh_assert(ito_move_closer_value > icloser_bucket); - - const std::size_t ireturn_bucket = ito_move_closer_value - - std::min(ito_move_closer_value - icloser_bucket, - std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket())); - - while(ito_move_closer_value < m_bucket_count && m_buckets[ito_move_closer_value].dist_from_ideal_bucket() > 0) { - icloser_bucket = ito_move_closer_value - - std::min(ito_move_closer_value - icloser_bucket, - std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket())); - - - tsl_rh_assert(m_buckets[icloser_bucket].empty()); - const distance_type new_distance = distance_type(m_buckets[ito_move_closer_value].dist_from_ideal_bucket() - - (ito_move_closer_value - icloser_bucket)); - m_buckets[icloser_bucket].set_value_of_empty_bucket(new_distance, - m_buckets[ito_move_closer_value].truncated_hash(), - std::move(m_buckets[ito_move_closer_value].value())); - m_buckets[ito_move_closer_value].clear(); - - - ++icloser_bucket; - ++ito_move_closer_value; - } - - - return iterator(m_buckets + ireturn_bucket); - } - - - template - size_type erase(const K& key) { - return erase(key, hash_key(key)); - } - - template - size_type erase(const K& key, std::size_t hash) { - auto it = find(key, hash); - if(it != end()) { - erase_from_bucket(it); - - return 1; - } - else { - return 0; - } - } - - - - - - void swap(robin_hash& other) { - using std::swap; - - swap(static_cast(*this), static_cast(other)); - swap(static_cast(*this), static_cast(other)); - swap(static_cast(*this), static_cast(other)); - swap(m_buckets_data, other.m_buckets_data); - swap(m_buckets, other.m_buckets); - swap(m_bucket_count, other.m_bucket_count); - swap(m_nb_elements, other.m_nb_elements); - swap(m_load_threshold, other.m_load_threshold); - swap(m_max_load_factor, other.m_max_load_factor); - swap(m_grow_on_next_insert, other.m_grow_on_next_insert); - } - - - /* - * Lookup - */ - template::value>::type* = nullptr> - typename U::value_type& at(const K& key) { - return at(key, hash_key(key)); - } - - template::value>::type* = nullptr> - typename U::value_type& at(const K& key, std::size_t hash) { - return const_cast(static_cast(this)->at(key, hash)); - } - - - template::value>::type* = nullptr> - const typename U::value_type& at(const K& key) const { - return at(key, hash_key(key)); - } - - template::value>::type* = nullptr> - const typename U::value_type& at(const K& key, std::size_t hash) const { - auto it = find(key, hash); - if(it != cend()) { - return it.value(); - } - else { - TSL_RH_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key."); - } - } - - template::value>::type* = nullptr> - typename U::value_type& operator[](K&& key) { - return try_emplace(std::forward(key)).first.value(); - } - - - template - size_type count(const K& key) const { - return count(key, hash_key(key)); - } - - template - size_type count(const K& key, std::size_t hash) const { - if(find(key, hash) != cend()) { - return 1; - } - else { - return 0; - } - } - - - template - iterator find(const K& key) { - return find_impl(key, hash_key(key)); - } - - template - iterator find(const K& key, std::size_t hash) { - return find_impl(key, hash); - } - - - template - const_iterator find(const K& key) const { - return find_impl(key, hash_key(key)); - } - - template - const_iterator find(const K& key, std::size_t hash) const { - return find_impl(key, hash); - } - - - template - std::pair equal_range(const K& key) { - return equal_range(key, hash_key(key)); - } - - template - std::pair equal_range(const K& key, std::size_t hash) { - iterator it = find(key, hash); - return std::make_pair(it, (it == end())?it:std::next(it)); - } - - - template - std::pair equal_range(const K& key) const { - return equal_range(key, hash_key(key)); - } - - template - std::pair equal_range(const K& key, std::size_t hash) const { - const_iterator it = find(key, hash); - return std::make_pair(it, (it == cend())?it:std::next(it)); - } - - /* - * Bucket interface - */ - size_type bucket_count() const { - return m_bucket_count; - } - - size_type max_bucket_count() const { - return std::min(GrowthPolicy::max_bucket_count(), m_buckets_data.max_size()); - } - - /* - * Hash policy - */ - float load_factor() const { - if(bucket_count() == 0) { - return 0; - } - - return float(m_nb_elements)/float(bucket_count()); - } - - float max_load_factor() const { - return m_max_load_factor; - } - - void max_load_factor(float ml) { - m_max_load_factor = std::max(0.1f, std::min(ml, 0.95f)); - m_load_threshold = size_type(float(bucket_count())*m_max_load_factor); - } - - void rehash(size_type count) { - count = std::max(count, size_type(std::ceil(float(size())/max_load_factor()))); - rehash_impl(count); - } - - void reserve(size_type count) { - rehash(size_type(std::ceil(float(count)/max_load_factor()))); - } - - /* - * Observers - */ - hasher hash_function() const { - return static_cast(*this); - } - - key_equal key_eq() const { - return static_cast(*this); - } - - - /* - * Other - */ - iterator mutable_iterator(const_iterator pos) { - return iterator(const_cast(pos.m_bucket)); - } - -private: - template - std::size_t hash_key(const K& key) const { - return Hash::operator()(key); - } - - template - bool compare_keys(const K1& key1, const K2& key2) const { - return KeyEqual::operator()(key1, key2); - } - - std::size_t bucket_for_hash(std::size_t hash) const { - const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); - tsl_rh_assert(bucket < m_bucket_count || (bucket == 0 && m_bucket_count == 0)); - - return bucket; - } - - template::value>::type* = nullptr> - std::size_t next_bucket(std::size_t index) const noexcept { - tsl_rh_assert(index < bucket_count()); - - return (index + 1) & this->m_mask; - } - - template::value>::type* = nullptr> - std::size_t next_bucket(std::size_t index) const noexcept { - tsl_rh_assert(index < bucket_count()); - - index++; - return (index != bucket_count())?index:0; - } - - - - template - iterator find_impl(const K& key, std::size_t hash) { - return mutable_iterator(static_cast(this)->find(key, hash)); - } - - template - const_iterator find_impl(const K& key, std::size_t hash) const { - std::size_t ibucket = bucket_for_hash(hash); - distance_type dist_from_ideal_bucket = 0; - - while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) { - if(TSL_RH_LIKELY((!USE_STORED_HASH_ON_LOOKUP || m_buckets[ibucket].bucket_hash_equal(hash)) && - compare_keys(KeySelect()(m_buckets[ibucket].value()), key))) - { - return const_iterator(m_buckets + ibucket); - } - - ibucket = next_bucket(ibucket); - dist_from_ideal_bucket++; - } - - return cend(); - } - - void erase_from_bucket(iterator pos) { - pos.m_bucket->clear(); - m_nb_elements--; - - /** - * Backward shift, swap the empty bucket, previous_ibucket, with the values on its right, ibucket, - * until we cross another empty bucket or if the other bucket has a distance_from_ideal_bucket == 0. - * - * We try to move the values closer to their ideal bucket. - */ - std::size_t previous_ibucket = static_cast(pos.m_bucket - m_buckets); - std::size_t ibucket = next_bucket(previous_ibucket); - - while(m_buckets[ibucket].dist_from_ideal_bucket() > 0) { - tsl_rh_assert(m_buckets[previous_ibucket].empty()); - - const distance_type new_distance = distance_type(m_buckets[ibucket].dist_from_ideal_bucket() - 1); - m_buckets[previous_ibucket].set_value_of_empty_bucket(new_distance, m_buckets[ibucket].truncated_hash(), - std::move(m_buckets[ibucket].value())); - m_buckets[ibucket].clear(); - - previous_ibucket = ibucket; - ibucket = next_bucket(ibucket); - } - } - - template - std::pair insert_impl(const K& key, Args&&... value_type_args) { - const std::size_t hash = hash_key(key); - - std::size_t ibucket = bucket_for_hash(hash); - distance_type dist_from_ideal_bucket = 0; - - while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) { - if((!USE_STORED_HASH_ON_LOOKUP || m_buckets[ibucket].bucket_hash_equal(hash)) && - compare_keys(KeySelect()(m_buckets[ibucket].value()), key)) - { - return std::make_pair(iterator(m_buckets + ibucket), false); - } - - ibucket = next_bucket(ibucket); - dist_from_ideal_bucket++; - } - - if(grow_on_high_load()) { - ibucket = bucket_for_hash(hash); - dist_from_ideal_bucket = 0; - - while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) { - ibucket = next_bucket(ibucket); - dist_from_ideal_bucket++; - } - } - - - if(m_buckets[ibucket].empty()) { - m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, bucket_entry::truncate_hash(hash), - std::forward(value_type_args)...); - } - else { - insert_value(ibucket, dist_from_ideal_bucket, bucket_entry::truncate_hash(hash), - std::forward(value_type_args)...); - } - - - m_nb_elements++; - /* - * The value will be inserted in ibucket in any case, either because it was - * empty or by stealing the bucket (robin hood). - */ - return std::make_pair(iterator(m_buckets + ibucket), true); - } - - - template - void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket, - truncated_hash_type hash, Args&&... value_type_args) - { - value_type value(std::forward(value_type_args)...); - insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value); - } - - void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket, - truncated_hash_type hash, value_type&& value) - { - insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value); - } - - /* - * We don't use `value_type&& value` as last argument due to a bug in MSVC when `value_type` is a pointer, - * The compiler is not able to see the difference between `std::string*` and `std::string*&&` resulting in - * compile error. - * - * The `value` will be in a moved state at the end of the function. - */ - void insert_value_impl(std::size_t ibucket, distance_type dist_from_ideal_bucket, - truncated_hash_type hash, value_type& value) - { - m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value); - ibucket = next_bucket(ibucket); - dist_from_ideal_bucket++; - - while(!m_buckets[ibucket].empty()) { - if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) { - if(dist_from_ideal_bucket >= REHASH_ON_HIGH_NB_PROBES__NPROBES && - load_factor() >= REHASH_ON_HIGH_NB_PROBES__MIN_LOAD_FACTOR) - { - /** - * The number of probes is really high, rehash the map on the next insert. - * Difficult to do now as rehash may throw an exception. - */ - m_grow_on_next_insert = true; - } - - m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value); - } - - ibucket = next_bucket(ibucket); - dist_from_ideal_bucket++; - } - - m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value)); - } - - - void rehash_impl(size_type count) { - robin_hash new_table(count, static_cast(*this), static_cast(*this), - get_allocator(), m_max_load_factor); - - const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_table.bucket_count()); - for(auto& bucket: m_buckets_data) { - if(bucket.empty()) { - continue; - } - - const std::size_t hash = use_stored_hash?bucket.truncated_hash(): - new_table.hash_key(KeySelect()(bucket.value())); - - new_table.insert_value_on_rehash(new_table.bucket_for_hash(hash), 0, - bucket_entry::truncate_hash(hash), std::move(bucket.value())); - } - - new_table.m_nb_elements = m_nb_elements; - new_table.swap(*this); - } - - void insert_value_on_rehash(std::size_t ibucket, distance_type dist_from_ideal_bucket, - truncated_hash_type hash, value_type&& value) - { - while(true) { - if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) { - if(m_buckets[ibucket].empty()) { - m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value)); - return; - } - else { - m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value); - } - } - - dist_from_ideal_bucket++; - ibucket = next_bucket(ibucket); - } - } - - - - /** - * Return true if the map has been rehashed. - */ - bool grow_on_high_load() { - if(m_grow_on_next_insert || size() >= m_load_threshold) { - rehash_impl(GrowthPolicy::next_bucket_count()); - m_grow_on_next_insert = false; - - return true; - } - - return false; - } - - -public: - static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0; - static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f; - -private: - static const distance_type REHASH_ON_HIGH_NB_PROBES__NPROBES = 128; - static constexpr float REHASH_ON_HIGH_NB_PROBES__MIN_LOAD_FACTOR = 0.15f; - - - /** - * Return an always valid pointer to an static empty bucket_entry with last_bucket() == true. - */ - bucket_entry* static_empty_bucket_ptr() { - static bucket_entry empty_bucket(true); - return &empty_bucket; - } - -private: - buckets_container_type m_buckets_data; - - /** - * Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points to static_empty_bucket_ptr. - * This variable is useful to avoid the cost of checking if m_buckets_data is empty when trying - * to find an element. - * - * TODO Remove m_buckets_data and only use a pointer instead of a pointer+vector to save some space in the robin_hash object. - * Manage the Allocator manually. - */ - bucket_entry* m_buckets; - - /** - * Used a lot in find, avoid the call to m_buckets_data.size() which is a bit slower. - */ - size_type m_bucket_count; - - size_type m_nb_elements; - - size_type m_load_threshold; - float m_max_load_factor; - - bool m_grow_on_next_insert; -}; - -} - -} - -#endif diff --git a/include/robin_map.hpp b/include/robin_map.hpp deleted file mode 100644 index 597cbbc..0000000 --- a/include/robin_map.hpp +++ /dev/null @@ -1,667 +0,0 @@ -/** - * MIT License - * - * Copyright (c) 2017 Tessil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TSL_ROBIN_MAP_H -#define TSL_ROBIN_MAP_H - - -#include -#include -#include -#include -#include -#include -#include - -namespace csv { - - -/** - * Implementation of a hash map using open-adressing and the robin hood hashing algorithm with backward shift deletion. - * - * For operations modifying the hash map (insert, erase, rehash, ...), the strong exception guarantee - * is only guaranteed when the expression `std::is_nothrow_swappable>::value && - * std::is_nothrow_move_constructible>::value` is true, otherwise if an exception - * is thrown during the swap or the move, the hash map may end up in a undefined state. Per the standard - * a `Key` or `T` with a noexcept copy constructor and no move constructor also satisfies the - * `std::is_nothrow_move_constructible>::value` criterion (and will thus guarantee the - * strong exception for the map). - * - * When `StoreHash` is true, 32 bits of the hash are stored alongside the values. It can improve - * the performance during lookups if the `KeyEqual` function takes time (if it engenders a cache-miss for example) - * as we then compare the stored hashes before comparing the keys. When `csv::rh::power_of_two_growth_policy` is used - * as `GrowthPolicy`, it may also speed-up the rehash process as we can avoid to recalculate the hash. - * When it is detected that storing the hash will not incur any memory penality due to alignement (i.e. - * `sizeof(csv::detail_robin_hash::bucket_entry) == - * sizeof(csv::detail_robin_hash::bucket_entry)`) and `csv::rh::power_of_two_growth_policy` is - * used, the hash will be stored even if `StoreHash` is false so that we can speed-up the rehash (but it will - * not be used on lookups unless `StoreHash` is true). - * - * `GrowthPolicy` defines how the map grows and consequently how a hash value is mapped to a bucket. - * By default the map uses `csv::rh::power_of_two_growth_policy`. This policy keeps the number of buckets - * to a power of two and uses a mask to map the hash to a bucket instead of the slow modulo. - * Other growth policies are available and you may define your own growth policy, - * check `csv::rh::power_of_two_growth_policy` for the interface. - * - * If the destructor of `Key` or `T` throws an exception, the behaviour of the class is undefined. - * - * Iterators invalidation: - * - clear, operator=, reserve, rehash: always invalidate the iterators. - * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators. - * - erase: always invalidate the iterators. - */ -template, - class KeyEqual = std::equal_to, - class Allocator = std::allocator>, - bool StoreHash = false, - class GrowthPolicy = csv::rh::power_of_two_growth_policy<2>> -class robin_map { -private: - template - using has_is_transparent = csv::detail_robin_hash::has_is_transparent; - - class KeySelect { - public: - using key_type = Key; - - const key_type& operator()(const std::pair& key_value) const noexcept { - return key_value.first; - } - - key_type& operator()(std::pair& key_value) noexcept { - return key_value.first; - } - }; - - class ValueSelect { - public: - using value_type = T; - - const value_type& operator()(const std::pair& key_value) const noexcept { - return key_value.second; - } - - value_type& operator()(std::pair& key_value) noexcept { - return key_value.second; - } - }; - - using ht = detail_robin_hash::robin_hash, KeySelect, ValueSelect, - Hash, KeyEqual, Allocator, StoreHash, GrowthPolicy>; - -public: - using key_type = typename ht::key_type; - using mapped_type = T; - using value_type = typename ht::value_type; - using size_type = typename ht::size_type; - using difference_type = typename ht::difference_type; - using hasher = typename ht::hasher; - using key_equal = typename ht::key_equal; - using allocator_type = typename ht::allocator_type; - using reference = typename ht::reference; - using const_reference = typename ht::const_reference; - using pointer = typename ht::pointer; - using const_pointer = typename ht::const_pointer; - using iterator = typename ht::iterator; - using const_iterator = typename ht::const_iterator; - - -public: - /* - * Constructors - */ - robin_map(): robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE) { - } - - explicit robin_map(size_type bucket_count, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()): - m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) - { - } - - robin_map(size_type bucket_count, - const Allocator& alloc): robin_map(bucket_count, Hash(), KeyEqual(), alloc) - { - } - - robin_map(size_type bucket_count, - const Hash& hash, - const Allocator& alloc): robin_map(bucket_count, hash, KeyEqual(), alloc) - { - } - - explicit robin_map(const Allocator& alloc): robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { - } - - template - robin_map(InputIt first, InputIt last, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()): robin_map(bucket_count, hash, equal, alloc) - { - insert(first, last); - } - - template - robin_map(InputIt first, InputIt last, - size_type bucket_count, - const Allocator& alloc): robin_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) - { - } - - template - robin_map(InputIt first, InputIt last, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc): robin_map(first, last, bucket_count, hash, KeyEqual(), alloc) - { - } - - robin_map(std::initializer_list init, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()): - robin_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) - { - } - - robin_map(std::initializer_list init, - size_type bucket_count, - const Allocator& alloc): - robin_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) - { - } - - robin_map(std::initializer_list init, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc): - robin_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) - { - } - - robin_map& operator=(std::initializer_list ilist) { - m_ht.clear(); - - m_ht.reserve(ilist.size()); - m_ht.insert(ilist.begin(), ilist.end()); - - return *this; - } - - allocator_type get_allocator() const { return m_ht.get_allocator(); } - - - /* - * Iterators - */ - iterator begin() noexcept { return m_ht.begin(); } - const_iterator begin() const noexcept { return m_ht.begin(); } - const_iterator cbegin() const noexcept { return m_ht.cbegin(); } - - iterator end() noexcept { return m_ht.end(); } - const_iterator end() const noexcept { return m_ht.end(); } - const_iterator cend() const noexcept { return m_ht.cend(); } - - - /* - * Capacity - */ - bool empty() const noexcept { return m_ht.empty(); } - size_type size() const noexcept { return m_ht.size(); } - size_type max_size() const noexcept { return m_ht.max_size(); } - - /* - * Modifiers - */ - void clear() noexcept { m_ht.clear(); } - - - - std::pair insert(const value_type& value) { - return m_ht.insert(value); - } - - template::value>::type* = nullptr> - std::pair insert(P&& value) { - return m_ht.emplace(std::forward

(value)); - } - - std::pair insert(value_type&& value) { - return m_ht.insert(std::move(value)); - } - - - iterator insert(const_iterator hint, const value_type& value) { - return m_ht.insert_hint(hint, value); - } - - template::value>::type* = nullptr> - iterator insert(const_iterator hint, P&& value) { - return m_ht.emplace_hint(hint, std::forward

(value)); - } - - iterator insert(const_iterator hint, value_type&& value) { - return m_ht.insert_hint(hint, std::move(value)); - } - - - template - void insert(InputIt first, InputIt last) { - m_ht.insert(first, last); - } - - void insert(std::initializer_list ilist) { - m_ht.insert(ilist.begin(), ilist.end()); - } - - - - - template - std::pair insert_or_assign(const key_type& k, M&& obj) { - return m_ht.insert_or_assign(k, std::forward(obj)); - } - - template - std::pair insert_or_assign(key_type&& k, M&& obj) { - return m_ht.insert_or_assign(std::move(k), std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { - return m_ht.insert_or_assign(hint, k, std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { - return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); - } - - - - /** - * Due to the way elements are stored, emplace will need to move or copy the key-value once. - * The method is equivalent to insert(value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - std::pair emplace(Args&&... args) { - return m_ht.emplace(std::forward(args)...); - } - - - - /** - * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. - * The method is equivalent to insert(hint, value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - iterator emplace_hint(const_iterator hint, Args&&... args) { - return m_ht.emplace_hint(hint, std::forward(args)...); - } - - - - - template - std::pair try_emplace(const key_type& k, Args&&... args) { - return m_ht.try_emplace(k, std::forward(args)...); - } - - template - std::pair try_emplace(key_type&& k, Args&&... args) { - return m_ht.try_emplace(std::move(k), std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { - return m_ht.try_emplace_hint(hint, k, std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { - return m_ht.try_emplace_hint(hint, std::move(k), std::forward(args)...); - } - - - - - iterator erase(iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } - size_type erase(const key_type& key) { return m_ht.erase(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - size_type erase(const key_type& key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - size_type erase(const K& key) { return m_ht.erase(key); } - - /** - * @copydoc erase(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - template::value>::type* = nullptr> - size_type erase(const K& key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - - - void swap(robin_map& other) { other.m_ht.swap(m_ht); } - - - - /* - * Lookup - */ - T& at(const Key& key) { return m_ht.at(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } - - - const T& at(const Key& key) const { return m_ht.at(key); } - - /** - * @copydoc at(const Key& key, std::size_t precalculated_hash) - */ - const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } - - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - T& at(const K& key) { return m_ht.at(key); } - - /** - * @copydoc at(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } - - - /** - * @copydoc at(const K& key) - */ - template::value>::type* = nullptr> - const T& at(const K& key) const { return m_ht.at(key); } - - /** - * @copydoc at(const K& key, std::size_t precalculated_hash) - */ - template::value>::type* = nullptr> - const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } - - - - - T& operator[](const Key& key) { return m_ht[key]; } - T& operator[](Key&& key) { return m_ht[std::move(key)]; } - - - - - size_type count(const Key& key) const { return m_ht.count(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - size_type count(const Key& key, std::size_t precalculated_hash) const { - return m_ht.count(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - size_type count(const K& key) const { return m_ht.count(key); } - - /** - * @copydoc count(const K& key) const - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } - - - - - iterator find(const Key& key) { return m_ht.find(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - const_iterator find(const Key& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const Key& key, std::size_t precalculated_hash) - */ - const_iterator find(const Key& key, std::size_t precalculated_hash) const { - return m_ht.find(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - iterator find(const K& key) { return m_ht.find(key); } - - /** - * @copydoc find(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - /** - * @copydoc find(const K& key) - */ - template::value>::type* = nullptr> - const_iterator find(const K& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - const_iterator find(const K& key, std::size_t precalculated_hash) const { - return m_ht.find(key, precalculated_hash); - } - - - - - std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key) { return m_ht.equal_range(key); } - - - /** - * @copydoc equal_range(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * @copydoc equal_range(const K& key) - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const K& key, std::size_t precalculated_hash) - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - - - - /* - * Bucket interface - */ - size_type bucket_count() const { return m_ht.bucket_count(); } - size_type max_bucket_count() const { return m_ht.max_bucket_count(); } - - - /* - * Hash policy - */ - float load_factor() const { return m_ht.load_factor(); } - float max_load_factor() const { return m_ht.max_load_factor(); } - void max_load_factor(float ml) { m_ht.max_load_factor(ml); } - - void rehash(size_type count) { m_ht.rehash(count); } - void reserve(size_type count) { m_ht.reserve(count); } - - - /* - * Observers - */ - hasher hash_function() const { return m_ht.hash_function(); } - key_equal key_eq() const { return m_ht.key_eq(); } - - /* - * Other - */ - - /** - * Convert a const_iterator to an iterator. - */ - iterator mutable_iterator(const_iterator pos) { - return m_ht.mutable_iterator(pos); - } - - friend bool operator==(const robin_map& lhs, const robin_map& rhs) { - if(lhs.size() != rhs.size()) { - return false; - } - - for(const auto& element_lhs: lhs) { - const auto it_element_rhs = rhs.find(element_lhs.first); - if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) { - return false; - } - } - - return true; - } - - friend bool operator!=(const robin_map& lhs, const robin_map& rhs) { - return !operator==(lhs, rhs); - } - - friend void swap(robin_map& lhs, robin_map& rhs) { - lhs.swap(rhs); - } - -private: - ht m_ht; -}; - - -/** - * Same as `csv::robin_map`. - */ -template, - class KeyEqual = std::equal_to, - class Allocator = std::allocator>, - bool StoreHash = false> -using robin_pg_map = robin_map; - -} - -#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 26ddc70..90c00da 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ endif() file(GLOB CSV_TEST_SOURCES "*.cpp" "*.hpp" - "../include/*.hpp" + "../include/csv/*.hpp" "inputs/*.csv" ) @@ -26,7 +26,7 @@ ADD_EXECUTABLE(CSV ${CSV_TEST_SOURCES}) target_link_libraries(CSV ${CMAKE_THREAD_LIBS_INIT}) INCLUDE_DIRECTORIES("../include" ".") set_target_properties(CSV PROPERTIES OUTPUT_NAME tests) -set_property(TARGET CSV PROPERTY CXX_STANDARD 11) +set_property(TARGET CSV PROPERTY CXX_STANDARD 17) # Set ${PROJECT_NAME} as the startup project set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT CSV) diff --git a/test/tests.hpp b/test/tests.hpp index f68852d..2dd6fb7 100644 --- a/test/tests.hpp +++ b/test/tests.hpp @@ -1,6 +1,7 @@ #pragma once #include -#include +#include +#include TEST_CASE("Parse an empty CSV", "[simple csv]") { csv::Reader csv; @@ -39,7 +40,7 @@ TEST_CASE("Parse the most basic of CSV buffers", "[simple csv]") { TEST_CASE("Parse the most basic of CSV buffers (Iterator)", "[simple csv]") { csv::Reader csv; csv.read("inputs/test_01.csv"); - std::vector> rows; + std::vector> rows; while (csv.busy()) { if (csv.ready()) { auto row = csv.next_row();