From 556b61234edaec30a7634769ddd65e4ab5c7ed78 Mon Sep 17 00:00:00 2001 From: taranbis Date: Wed, 2 Sep 2020 21:05:21 +0200 Subject: [PATCH] perf: add implementation for storing call stacks as a prefix tree - Implementations for storing call stack information will just have to implement 4 functions: make_trace(), get_current_element(), insert_function_element() and remove_function_element(); - PrefixTree_StackDepot is the first version of adapting a prefix tree to be used for storing call stack information. It uses hash maps instead of constant size arrays; - PrefixTreeDepot.h represents the cache efficient optimized version for the prefix tree implementation. --- standalone/CMakeLists.txt | 1 - standalone/binarydecoder/BinaryDecoder.cpp | 10 +- standalone/detectors/fasttrack/CMakeLists.txt | 2 + .../detectors/fasttrack/include/MemoryPool.h | 61 ++++ .../fasttrack/include/PoolAllocator.h | 199 +++++++++++++ .../fasttrack/include/PrefixTreeDepot.h | 278 ++++++++++++++++++ .../fasttrack/include/PrefixTree_StackDepot.h | 95 ++++++ .../detectors/fasttrack/include/fasttrack.h | 9 +- .../detectors/fasttrack/include/stacktrace.h | 8 - .../detectors/fasttrack/include/threadstate.h | 4 +- .../detectors/fasttrack/include/vectorclock.h | 11 + .../detectors/fasttrack/src/MemoryPool.cpp | 31 ++ .../fasttrack/test/fasttrack_test.cpp | 6 +- standalone/ft_benchmark/ft_benchmark.cpp | 16 +- standalone/helper/CMakeLists.txt | 16 - standalone/helper/src/main.cpp | 46 --- 16 files changed, 705 insertions(+), 88 deletions(-) create mode 100644 standalone/detectors/fasttrack/include/MemoryPool.h create mode 100644 standalone/detectors/fasttrack/include/PoolAllocator.h create mode 100644 standalone/detectors/fasttrack/include/PrefixTreeDepot.h create mode 100644 standalone/detectors/fasttrack/include/PrefixTree_StackDepot.h create mode 100644 standalone/detectors/fasttrack/src/MemoryPool.cpp delete mode 100644 standalone/helper/CMakeLists.txt delete mode 100644 standalone/helper/src/main.cpp diff --git a/standalone/CMakeLists.txt b/standalone/CMakeLists.txt index f913da6..da7aeae 100644 --- a/standalone/CMakeLists.txt +++ b/standalone/CMakeLists.txt @@ -21,5 +21,4 @@ endif() add_subdirectory("detectors/fasttrack") add_subdirectory("binarydecoder") -add_subdirectory("helper") add_subdirectory("ft_benchmark") diff --git a/standalone/binarydecoder/BinaryDecoder.cpp b/standalone/binarydecoder/BinaryDecoder.cpp index 0369bd2..518f712 100644 --- a/standalone/binarydecoder/BinaryDecoder.cpp +++ b/standalone/binarydecoder/BinaryDecoder.cpp @@ -19,7 +19,6 @@ #include "DetectorOutput.h" int main(int argc, char** argv) { - // std::string detec = "drace.detector.tsan.dll"; std::string detec = "drace.detector.fasttrack.standalone.dll"; std::string file = "trace.bin"; @@ -46,14 +45,19 @@ int main(int argc, char** argv) { std::vector buffer( (size_t)(size / sizeof(ipc::event::BufferEntry))); - __debugbreak(); + /** + * \note debug breaks are placed to measure only the performance of the + * detection algorithm, however, on a single thread. They are placed right + * before the first operation of the detector and right after the last one + */ + // __debugbreak(); DetectorOutput output(detec.c_str()); if (in_file.read((char*)(buffer.data()), size).good()) { for (auto it = buffer.begin(); it != buffer.end(); ++it) { ipc::event::BufferEntry tmp = *it; output.makeOutput(&tmp); } - __debugbreak(); + // __debugbreak(); } } catch (const std::exception& e) { std::cerr << "Could not load detector: " << e.what() << std::endl; diff --git a/standalone/detectors/fasttrack/CMakeLists.txt b/standalone/detectors/fasttrack/CMakeLists.txt index 965b607..f9c79a4 100644 --- a/standalone/detectors/fasttrack/CMakeLists.txt +++ b/standalone/detectors/fasttrack/CMakeLists.txt @@ -11,12 +11,14 @@ set(FT_SOURCES "src/stacktrace" "src/varstate" + "src/MemoryPool" "src/threadstate") set(FT_TEST_SOURCES "test/fasttrack_test" "src/stacktrace" "src/varstate" + "src/MemoryPool" "src/threadstate") include(GenerateExportHeader) diff --git a/standalone/detectors/fasttrack/include/MemoryPool.h b/standalone/detectors/fasttrack/include/MemoryPool.h new file mode 100644 index 0000000..d40ba50 --- /dev/null +++ b/standalone/detectors/fasttrack/include/MemoryPool.h @@ -0,0 +1,61 @@ +#ifndef MEMORY_POOL_HEADER_H +#define MEMORY_POOL_HEADER_H 1 +#pragma once + +/* + * DRace, a dynamic data race detector + * + * Copyright 2020 Siemens AG + * + * Authors: + * Mihai Robescu + * + * SPDX-License-Identifier: MIT + */ + +#include +#include + +struct Chunk { + Chunk *next; // pointer to the next Chunk, when chunk is free +}; + +// For each allocator there will be a separate instantion of the memory pool +class MemoryPool { + private: + /// pointer to the first free address + Chunk *free_pointer = nullptr; + + /// number of chunks in a block + size_t num_chunks; + + /// chunk size equivalent to sizeof(T) from template specialization + size_t chunk_size; + + /// block size + size_t block_size; + + /// holds how many chunks were allocated until now + size_t chunks_allocated = 0; + + public: + MemoryPool() = default; + MemoryPool(size_t chunkSize) : chunk_size(chunkSize) {} + MemoryPool(size_t chunkSize, size_t numChunks) + : chunk_size(chunkSize), num_chunks(numChunks) { + block_size = chunk_size * num_chunks; + free_pointer = get_more_memory(); + } + Chunk *allocate() { return do_allocation(); } + void deallocate(void *ptr) { do_deallocation(ptr); } + void print_used_memory() { + std::cout << "Memory Allocated: " << chunks_allocated * chunk_size + << std::endl; + } + + private: + Chunk *do_allocation(); + Chunk *get_more_memory(); // allocate 1 block of chunks + void do_deallocation(void *ptr); +}; +#endif // !MEMORY_POOL_HEADER_H diff --git a/standalone/detectors/fasttrack/include/PoolAllocator.h b/standalone/detectors/fasttrack/include/PoolAllocator.h new file mode 100644 index 0000000..d9c48c3 --- /dev/null +++ b/standalone/detectors/fasttrack/include/PoolAllocator.h @@ -0,0 +1,199 @@ +#ifndef POOL_ALLOCATOR_HEADER_H +#define POOL_ALLOCATOR_HEADER_H 1 +#pragma once + +/* + * DRace, a dynamic data race detector + * + * Copyright 2020 Siemens AG + * + * Authors: + * Mihai Robescu + * Felix Moessbauer + * + * SPDX-License-Identifier: MIT + */ + +#include +#include + +#include +#include "MemoryPool.h" + +/** + *------------------------------------------------------------------------------ + * + * Header File implementing a pool allocator. It can also be used + * in combination with the Segregator class, such as on line 136. + * + * There is also a thread-safe version. This is currently only + * experimental + * + *------------------------------------------------------------------------------ + */ + +template +class Segregator { + public: + using value_type = T; + using size_type = size_t; + using difference_type = ptrdiff_t; + using pointer = T*; + using const_pointer = const T*; + using void_pointer = void*; + using const_void_pointer = const void*; + + T* allocate(size_t n) { + size_t size = n * sizeof(T); + if (size < threshold) { + // return new (reinterpret_cast(SmallAllocator::allocate())) T(); + return reinterpret_cast(SmallAllocator::allocate()); + } else { + return reinterpret_cast(LargeAllocator::allocate()); + } + } + void deallocate(T* p, std::size_t n) noexcept { + size_t size = n * sizeof(T); + if (size < threshold) { + return SmallAllocator::deallocate(reinterpret_cast(p)); + } else { + return LargeAllocator::deallocate(reinterpret_cast(p)); + } + } + template + struct rebind { + using other = Segregator; + }; +}; + +template +class PoolAllocator { + public: + using value_type = T; + using size_type = size_t; + using difference_type = ptrdiff_t; + using pointer = T*; + using const_pointer = const T*; + using void_pointer = void*; + using const_void_pointer = const void*; + + PoolAllocator() = default; + ~PoolAllocator() = default; + + size_type max_size() const { return std::numeric_limits::max(); } + + static pointer allocate() { + return reinterpret_cast(mem_pool.allocate()); + } + + static void deallocate(void* ptr) { + mem_pool.deallocate(ptr); + } + + void usedMemory() { mem_pool.print_used_memory(); } + + template + PoolAllocator(const PoolAllocator& other) {} + + template + struct rebind { + using other = PoolAllocator; + }; + + private: + static MemoryPool mem_pool; +}; +template +MemoryPool PoolAllocator::mem_pool(sizeof(T), numChunks); + +template +class SizePoolAllocator { + public: + SizePoolAllocator() = default; + ~SizePoolAllocator() = default; + using size_type = size_t; + + size_type max_size() const { return std::numeric_limits::max(); } + + static void* allocate() { + return reinterpret_cast(mem_pool.allocate()); + } + + static void deallocate(void* ptr) { mem_pool.deallocate(ptr); } + + static void usedMemory() { mem_pool.print_used_memory(); } + + private: + static MemoryPool mem_pool; +}; +template +MemoryPool SizePoolAllocator::mem_pool(size, numChunks); + +template +using DRaceAllocator = Segregator< + T, 5, SizePoolAllocator<4>, + Segregator< + T, 9, SizePoolAllocator<8>, + Segregator< + T, 17, SizePoolAllocator<16>, + Segregator, + Segregator, + Segregator, + std::allocator>>>>>>; + +template +class ThreadSafePoolAllocator { + private: + std::atomic free_pointer{nullptr}; // pointer to the first free + size_t num_chunks = numChunks; // number of chunks in a block + size_t chunk_size = sizeof(T); // chunk size equivalent + size_t block_size = num_chunks * chunk_size; // block size + size_t chunks_allocated = 0; // how much memory was allocated until now + + public: + using value_type = T; + using size_type = size_t; + using difference_type = ptrdiff_t; + using pointer = T*; + using const_pointer = const T*; + using void_pointer = void*; + using const_void_pointer = const void*; + + ThreadSafePoolAllocator() = default; + ~ThreadSafePoolAllocator() = default; + + size_type max_size() const { return std::numeric_limits::max(); } + + pointer allocate() { + if (free_pointer.load(std::memory_order_release) == nullptr) { + free_pointer.store(get_more_memory(), std::memory_order_acquire); + } + // now we can for sure allocate all the objects. + Chunk* allocated = free_pointer.load(std::memory_order_release); + free_pointer.store(free_pointer.load(std::memory_order_release), + std::memory_order_acquire); + chunks_allocated++; + return reinterpret_cast(allocated); + } + + Chunk* get_more_memory() { + Chunk* start = reinterpret_cast(operator new(block_size)); + Chunk* it = start; + for (size_t i = 0; i < num_chunks - 1; ++i) { + it->next = + reinterpret_cast(reinterpret_cast(it) + chunk_size); + it = it->next; + } + it->next = nullptr; + return start; + } + + void deallocate(void* ptr) { + Chunk* c = reinterpret_cast(ptr); + c->next = free_pointer.load(std::memory_order_release); + free_pointer.store(c, std::memory_order_acquire); + chunks_allocated--; + } +}; + +#endif //! POOL_ALLOCATOR_HEADER_H \ No newline at end of file diff --git a/standalone/detectors/fasttrack/include/PrefixTreeDepot.h b/standalone/detectors/fasttrack/include/PrefixTreeDepot.h new file mode 100644 index 0000000..cecff0c --- /dev/null +++ b/standalone/detectors/fasttrack/include/PrefixTreeDepot.h @@ -0,0 +1,278 @@ + +#ifndef TREEDEPOT_HEADER_H +#define TREEDEPOT_HEADER_H 1 +#pragma once + +/* + * DRace, a dynamic data race detector + * + * Copyright 2020 Siemens AG + * + * Authors: + * Mihai Robescu + * Felix Moessbauer + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include "PoolAllocator.h" + +/** + *------------------------------------------------------------------------------ + * + * Header File that implements a prefix tree data structure to + * store call stack elements. This is one is optimized to be + * cache efficient, making each node a multiple of the size of the + * cache line + * + *------------------------------------------------------------------------------ + */ + +class INode { + public: + size_t pc = -1; // 8 bytes + INode* parent = nullptr; // 8 bytes + + virtual INode* fast_check(size_t pc) const { + throw std::runtime_error("Not implemented"); + return nullptr; + } + + virtual size_t size() const { + throw std::runtime_error("Not implemented"); + return -1; + } + + virtual ~INode() {} + + virtual bool add_child_node(INode* next, size_t pc) { + throw std::runtime_error("Not implemented"); + return 1; + } + + virtual void change_child_node(INode* tmp, INode* _curr_elem) { + throw std::runtime_error("Not implemented"); + } + + virtual void change_parent_node(INode* tmp) { + throw std::runtime_error("Not implemented"); + } +}; + +template +class Node : public INode { + public: + std::array child_values; // N * 8 bytes + std::array child_nodes; // N * 8 bytes + + ~Node() = default; + Node& operator=(const Node& other) = default; + Node(const Node& other) = delete; + + explicit Node() { + pc = -1; + parent = nullptr; + for (int i = 0; i < N; ++i) { + child_values[i] = -1; + } + for (int i = 0; i < N; ++i) { + child_nodes[i] = nullptr; + } + } + + size_t size() const final { return N; } + + INode* fast_check(size_t pc) const final { + for (int i = 0; i < N; ++i) { + if (child_values[i] == pc) { + return child_nodes[i]; + } + } + return nullptr; + } + + bool add_child_node(INode* next, size_t pc) final { + for (int i = 0; i < N; ++i) { + if (child_values[i] == -1) { + child_values[i] = pc; + child_nodes[i] = next; + return true; + } + } + return false; + } + + void change_child_node(INode* tmp, INode* _curr_elem) final { + for (int i = 0; i < N; ++i) { + if (child_nodes[i] == _curr_elem) { + child_nodes[i] = tmp; + return; + } + } // replace new pointer in the parent list + } + + void change_parent_node(INode* tmp) final { + for (int i = 0; i < N; ++i) { + child_nodes[i]->parent = tmp; + } + } +}; + +template +class SelectAllocator { + public: + static constexpr int threshold1 = 2; + using Allocator1 = PoolAllocator, 8192>; + static constexpr int threshold2 = 6; + using Allocator2 = PoolAllocator, 4096>; + static constexpr int threshold3 = 10; + using Allocator3 = PoolAllocator, 64>; + static constexpr int threshold4 = 38; + using Allocator4 = PoolAllocator, 64>; + static constexpr int threshold5 = 198; + using Allocator5 = PoolAllocator, 32>; + static constexpr int threshold6 = 1000; + using LargeAllocator = std::allocator>; + + Allocator1 al1; + Allocator2 al2; + Allocator3 al3; + Allocator4 al4; + Allocator5 al5; + LargeAllocator alL; + + T* allocate(size_t size) { + if (size < threshold1) { + return new (reinterpret_cast(al1.allocate())) Node(); + } else if (size < threshold2) { + return new (reinterpret_cast(al2.allocate())) Node(); + } else if (size < threshold3) { + return new (reinterpret_cast(al3.allocate())) Node(); + } else if (size < threshold4) { + return new (reinterpret_cast(al4.allocate())) Node(); + } else if (size < threshold5) { + return new (reinterpret_cast(al5.allocate())) Node(); + } else { // allocate just 1; + Node* new_t = + std::allocator_traits::allocate(alL, 1); + std::allocator_traits::construct(alL, new_t); + return new_t; + } + } + + void deallocate(INode* ptr, size_t size) { + if (size < threshold1) { + // Node* tmp = dynamic_cast*>(ptr); + // tmp->~Node(); //doens't work when I am calling destructor + al1.deallocate(ptr); + } else if (size < threshold2) { + al2.deallocate(ptr); + } else if (size < threshold3) { + al3.deallocate(ptr); + } else if (size < threshold4) { + al4.deallocate(ptr); + } else if (size < threshold5) { + al5.deallocate(ptr); + } else { // deallocate just 1; + Node* tmp = dynamic_cast*>(ptr); + std::allocator_traits::destroy(alL, tmp); + std::allocator_traits::deallocate(alL, tmp, 1); + } + } +}; + +using Allocator = SelectAllocator; +extern ipc::spinlock read_write_lock; + +class TreeDepot { + INode* _curr_elem = nullptr; + Allocator al; + + public: + INode* get_current_element() { + // std::lock_guard lg(_read_write_lock); + return _curr_elem; + } + + void insert_function_element(size_t pc) { + std::lock_guard lg(read_write_lock); + + if (_curr_elem == nullptr) { + // the root function has to be called with a big size + _curr_elem = al.allocate(5); + _curr_elem->parent = nullptr; + _curr_elem->pc = pc; + return; + } + + if (pc == _curr_elem->pc) return; // done for recursive functions; + // no need to use more nodes for same function + + INode* next = _curr_elem->fast_check(pc); + if (next) { + _curr_elem = next; + return; + } else { // it is not the current node or any of the child nodes + next = al.allocate(1); + next->pc = pc; + next->parent = _curr_elem; + if (_curr_elem->add_child_node(next, pc)) { + _curr_elem = next; + return; + } + } + // If we got to here, it means that the current node should be of a + // bigger size => allocate next big thing; + INode* tmp = al.allocate(_curr_elem->size()); + *tmp = *_curr_elem; // might be really slow, use move instead of copy + // assignment operator + INode* parent = _curr_elem->parent; + if (parent) { + parent->change_child_node(tmp, _curr_elem); + } + // replace so that children of current node point to the new value + _curr_elem->change_parent_node(tmp); + + // TODO: MUST replace it too in ThreadState::read_write + // Allocator::deallocate(_curr_elem, _curr_elem->size() - 1); + + _curr_elem = tmp; + next->parent = _curr_elem; + + // we already know that here we can go to the end. + _curr_elem->add_child_node(next, pc); + _curr_elem = next; + } + + void remove_function_element() { + std::lock_guard lg(read_write_lock); + + if (_curr_elem == nullptr) return; // func_exit before func_enter + + if (_curr_elem->parent == nullptr) { // exiting the root function + // Allocator::deallocate(_curr_elem, _curr_elem->size() - 1); + // not deallocating anymore because node might be used + _curr_elem = nullptr; + return; + } + _curr_elem = _curr_elem->parent; + } + + std::deque make_trace(const std::pair& data) const { + std::lock_guard lg(read_write_lock); + + std::deque this_stack; + this_stack.emplace_front(data.first); + + INode* iter = data.second; + while (iter != nullptr) { + this_stack.emplace_front(iter->pc); + iter = iter->parent; + } + return std::move(this_stack); + } +}; + +#endif diff --git a/standalone/detectors/fasttrack/include/PrefixTree_StackDepot.h b/standalone/detectors/fasttrack/include/PrefixTree_StackDepot.h new file mode 100644 index 0000000..65bc66d --- /dev/null +++ b/standalone/detectors/fasttrack/include/PrefixTree_StackDepot.h @@ -0,0 +1,95 @@ +#ifndef PREFIXTREE_STACKDEPOT_HEADER_H +#define PREFIXTREE_STACKDEPOT_HEADER_H 1 +#pragma once + +/* + * DRace, a dynamic data race detector + * + * Copyright 2020 Siemens AG + * + * Authors: + * Mihai Robescu + * Felix Moessbauer + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include "PoolAllocator.h" +#include "parallel_hashmap/phmap.h" + +/** + *------------------------------------------------------------------------------ + * + * Header File that provides the initial implementation of a Prefix + * Tree for being able to store call stacks. Check "PrefixTreeDepot.h" for a + * cache efficient implementation + * + *------------------------------------------------------------------------------ + */ + +typedef struct TrieNode { + size_t pc = -1; + TrieNode* parent = nullptr; + phmap::node_hash_map _childNodes; + + TrieNode() = default; +} TrieNode; + +class TrieStackDepot { + TrieNode* _curr_elem = nullptr; + + public: + TrieNode* get_current_element() { return _curr_elem; } + + void InsertFunction(size_t pc) { + if (_curr_elem == nullptr) { + // using new is slow => PoolAllocator in "PrefixTreeDepot.h" + _curr_elem = new TrieNode(); + _curr_elem->parent = nullptr; + _curr_elem->pc = pc; + } + if (pc == _curr_elem->pc) return; // done for recursive functions; + // no need to use more nodes for same function + + auto it = _curr_elem->_childNodes.find((size_t)pc); + if (it == _curr_elem->_childNodes.end()) { + it = _curr_elem->_childNodes.emplace_hint(it, pc, TrieNode()); + } + + TrieNode* next = &(it->second); + next->parent = _curr_elem; + _curr_elem = next; + _curr_elem->pc = pc; + } + + void ExitFunction() { + if (_curr_elem == nullptr) return; // func_exit before func_enter + + if (_curr_elem->parent == nullptr) { // exiting the root function + // delete _curr_elem; !! MEMORY IS NEVER FREED DOING THIS + // as there can be still slements pointing to it; + // TODO: remove all elements pointing to it as well + _curr_elem = nullptr; + return; + } + _curr_elem = _curr_elem->parent; + } + + std::deque make_trace( + const std::pair& data) const { + std::deque this_stack; + this_stack.emplace_front(data.first); + + TrieNode* iter = data.second; + while (iter != nullptr) { + this_stack.emplace_front(iter->pc); + iter = iter->parent; + } + return std::move(this_stack); + } +}; +#endif diff --git a/standalone/detectors/fasttrack/include/fasttrack.h b/standalone/detectors/fasttrack/include/fasttrack.h index 9957f54..db5e7fc 100644 --- a/standalone/detectors/fasttrack/include/fasttrack.h +++ b/standalone/detectors/fasttrack/include/fasttrack.h @@ -800,7 +800,8 @@ class Fasttrack : public Detector { if (VectorClock<>::make_thread_num(it->second.get_read_epoch()) == th_num && VectorClock<>::make_thread_num(it->second.get_read_epoch()) == - th_num) { // the memory address was accessed only by this thread + th_num) { // the memory address was accessed only by this + // thread auto tmp = it; it++; vars.erase(tmp); @@ -925,7 +926,8 @@ class Fasttrack : public Detector { } } - /// removes memory addresses by choosing every 3 the one with the lowest clock + /// removes memory addresses by choosing every 3 the one with the lowest + /// clock void remove_memory_addresses_by_lowest_clock() { if (log_flag) { log_count.remove_memory_addresses_by_lowest_clock_calls++; @@ -937,7 +939,8 @@ class Fasttrack : public Detector { auto remove_it = it; while (it != vars.end()) { - // gather data from 3 variables and remove the one with the lowest clock. + // gather data from 3 variables and remove the one with the lowest + // clock. if (it->second.get_write_clock() < min_clock) { remove_it = it; min_clock = it->second.get_write_clock(); diff --git a/standalone/detectors/fasttrack/include/stacktrace.h b/standalone/detectors/fasttrack/include/stacktrace.h index 58a9ff4..342213f 100644 --- a/standalone/detectors/fasttrack/include/stacktrace.h +++ b/standalone/detectors/fasttrack/include/stacktrace.h @@ -48,14 +48,6 @@ class StackTrace { uint16_t pop_count = 0; mutable ipc::spinlock lock; - - /** - * \note locking was moved to ThreadState - * \note Locking is necessary if and only if elements are removed from the - * tree. As long as no elements are removed locking is not necessary: mutable - * ipc::spinlock lock; - */ - /** * \brief cleanup unreferenced nodes in callstack tree * \warning very expensive diff --git a/standalone/detectors/fasttrack/include/threadstate.h b/standalone/detectors/fasttrack/include/threadstate.h index 8a7eda1..04d271d 100644 --- a/standalone/detectors/fasttrack/include/threadstate.h +++ b/standalone/detectors/fasttrack/include/threadstate.h @@ -16,6 +16,8 @@ #include #include +#include "PrefixTreeDepot.h" +#include "PrefixTree_StackDepot.h" #include "stacktrace.h" #include "vectorclock.h" #include "xvector.h" @@ -114,7 +116,7 @@ class ThreadState : public VectorClock<> { /** * \brief returns a stack trace of a memory location for handing it over to - * drace \note theadsafe + * drace \note threadsafe */ std::deque return_stack_trace(std::size_t address) const; }; diff --git a/standalone/detectors/fasttrack/include/vectorclock.h b/standalone/detectors/fasttrack/include/vectorclock.h index e43c998..a7d9f73 100644 --- a/standalone/detectors/fasttrack/include/vectorclock.h +++ b/standalone/detectors/fasttrack/include/vectorclock.h @@ -12,6 +12,7 @@ * * SPDX-License-Identifier: MIT */ + #include "parallel_hashmap/phmap.h" /** @@ -168,6 +169,16 @@ class VectorClock { } return id; } + + private: + static bool _thread_no_initiliazation; + static bool ThreadNoInitialization() { + for (int i = MAX_TH_NUM; i >= 1; + --i) { //!! thread numbers start from 1 because of is_rw_sh_race + thread_nums.emplace(i); + } + return id; + } }; VectorClock<>::ThreadNum VectorClock<>::thread_no = 1; phmap::flat_hash_map::ThreadNum, VectorClock<>::TID> diff --git a/standalone/detectors/fasttrack/src/MemoryPool.cpp b/standalone/detectors/fasttrack/src/MemoryPool.cpp new file mode 100644 index 0000000..43e3ad2 --- /dev/null +++ b/standalone/detectors/fasttrack/src/MemoryPool.cpp @@ -0,0 +1,31 @@ +#include "MemoryPool.h" + +Chunk* MemoryPool::do_allocation() { + if (free_pointer == nullptr) { + free_pointer = get_more_memory(); + } + // now we can for sure allocate all the objects. + Chunk* allocated = free_pointer; + free_pointer = free_pointer->next; + chunks_allocated++; + return allocated; +} + +Chunk* MemoryPool::get_more_memory() { + Chunk* start = reinterpret_cast(operator new(block_size)); + Chunk* it = start; + for (size_t i = 0; i < num_chunks - 1; ++i) { + it->next = + reinterpret_cast(reinterpret_cast(it) + chunk_size); + it = it->next; + } + it->next = nullptr; + return start; +} + +void MemoryPool::do_deallocation(void* ptr) { + Chunk* c = reinterpret_cast(ptr); + c->next = free_pointer; + free_pointer = c; + chunks_allocated--; +} \ No newline at end of file diff --git a/standalone/detectors/fasttrack/test/fasttrack_test.cpp b/standalone/detectors/fasttrack/test/fasttrack_test.cpp index 59b0ad2..3cac688 100644 --- a/standalone/detectors/fasttrack/test/fasttrack_test.cpp +++ b/standalone/detectors/fasttrack/test/fasttrack_test.cpp @@ -367,7 +367,7 @@ TEST(FasttrackTest, Drop_State_Indicate_Shared_Read_Write_Race) { const char* argv_mock[] = {"ft_test", "--size", "2"}; void* tls[3]; // storage for TLS data - ft->init(1, argv_mock, rc_clb, nullptr); + ft->init(3, argv_mock, rc_clb, nullptr); ft->fork(0, 1, &tls[0]); // t0 ft->fork(0, 2, &tls[1]); // t1 ft->fork(0, 3, &tls[2]); // t2 @@ -404,7 +404,7 @@ TEST(FasttrackTest, Write_Write_Race) { void* tls[3]; // storage for TLS data void* mtx[2] = {(void*)0x123ull, (void*)0x1234ull}; - ft->init(1, argv_mock, rc_clb, nullptr); + ft->init(3, argv_mock, rc_clb, nullptr); ft->fork(0, 1, &tls[0]); // t0 ft->fork(0, 2, &tls[1]); // t1 ft->fork(0, 3, &tls[2]); // t2 @@ -548,4 +548,4 @@ TEST(FasttrackTest, Fasttrack_Race_And_StackTrace) { ft->func_enter(tls[1], (void*)0x70ull); // here, we expect the race. Handled in callback ft->finalize(); -} \ No newline at end of file +} diff --git a/standalone/ft_benchmark/ft_benchmark.cpp b/standalone/ft_benchmark/ft_benchmark.cpp index dbb9033..a78b81e 100644 --- a/standalone/ft_benchmark/ft_benchmark.cpp +++ b/standalone/ft_benchmark/ft_benchmark.cpp @@ -1,9 +1,10 @@ /* * DRace, a dynamic data race detector * - * Copyright 2018 Siemens AG + * Copyright 2020 Siemens AG * * Authors: + * Mihai Robescu * Felix Moessbauer * * SPDX-License-Identifier: MIT @@ -19,11 +20,12 @@ #include #include + std::mutex mx; static std::set random_reads; static std::set random_writes; static std::random_device rd{}; -std::mt19937 gen{ 0 }; +std::mt19937 gen{0}; void generate_block(int i, std::vector>* blocks) { @@ -52,7 +54,7 @@ void read_from_block(std::vector>* blocks) { } catch (const std::exception& e) { std::cout << e.what() << std::endl; } catch (...) { - std::cout << "Something!" << std::endl; + std::cout << "Failure!" << std::endl; } } @@ -73,15 +75,15 @@ void write_to_block(std::vector>* blocks) { } catch (const std::exception& e) { std::cout << e.what() << std::endl; } catch (...) { - std::cout << "Something!" << std::endl; + std::cout << "Failure!" << std::endl; } } int CountPossibleDataRaces(); /** - * Test tool to check for memory corruption and layout. - * To also check the race reporting, we try to enforce data-races + * \brief benchmarking program used to test the performance of the FastTrack2 + * algorithm implementation */ int main(int argc, char** argv) { std::vector> blocks; @@ -120,7 +122,7 @@ int main(int argc, char** argv) { std::cout << "No. of possible data races: " << std::setw(3) << no_of_data_races << std::endl; - //std::cin.get(); + // std::cin.get(); } int CountPossibleDataRaces() { diff --git a/standalone/helper/CMakeLists.txt b/standalone/helper/CMakeLists.txt deleted file mode 100644 index a0ffd06..0000000 --- a/standalone/helper/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ - -set(HELPER_SOURCES "src/main.cpp") - -add_executable("helper" ${HELPER_SOURCES}) - -set_target_properties("helper" PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON) - - -if(NOT TARGET Threads::Threads) - find_package(Threads REQUIRED) -endif() - -target_include_directories("helper" PUBLIC "include") - - -target_link_libraries("helper" PUBLIC Threads::Threads) diff --git a/standalone/helper/src/main.cpp b/standalone/helper/src/main.cpp deleted file mode 100644 index 8b7c63d..0000000 --- a/standalone/helper/src/main.cpp +++ /dev/null @@ -1,46 +0,0 @@ -#include -#include -#include -#include - -std::mutex mtx; - -#define STR(x) #x - -template -void increment(T& ptr, int NUM) { - for (int j = 0; j < NUM; j++) { - //mtx.lock(); - ptr++; - //mtx.unlock(); - } -} - -int main() { - std::vector threads; - size_t size = 2; - threads.reserve(size); - - int NUM = 5000; - int i = 0; - - for (size_t j = 0; j < size; ++j) { - threads.emplace_back(std::thread(increment, std::ref(i), NUM)); - } - - for (int j = 0; j < NUM; j++) { - mtx.lock(); - i++; - mtx.unlock(); - } - - for (size_t i = 0; i < size; ++i) { - threads[i].join(); - } - - std::cout << STR(i) << " = " << i << std::endl; - - std::cin >> i; - - return 0; -}