perf: add implementation for storing call stacks as a prefix tree

- Implementations for storing call stack information will just have to implement 4 functions: make_trace(), get_current_element(), insert_function_element() and remove_function_element(); - PrefixTree_StackDepot is the first version of adapting a prefix tree to be used for storing call stack information. It uses hash maps instead of constant size arrays; - PrefixTreeDepot.h represents the cache efficient optimized version for the prefix tree implementation.
siemens · Jan 15, 2021 · 556b612 · 556b612
1 parent d4f6b5c
commit 556b612
Show file tree

Hide file tree

Showing 16 changed files with 705 additions and 88 deletions.
diff --git a/standalone/CMakeLists.txt b/standalone/CMakeLists.txt
@@ -21,5 +21,4 @@ endif()
 
 add_subdirectory("detectors/fasttrack")
 add_subdirectory("binarydecoder")
-add_subdirectory("helper")
 add_subdirectory("ft_benchmark")
diff --git a/standalone/binarydecoder/BinaryDecoder.cpp b/standalone/binarydecoder/BinaryDecoder.cpp
@@ -19,7 +19,6 @@
 #include "DetectorOutput.h"
 
 int main(int argc, char** argv) {
-  //    std::string detec = "drace.detector.tsan.dll";
   std::string detec = "drace.detector.fasttrack.standalone.dll";
   std::string file = "trace.bin";
 
@@ -46,14 +45,19 @@ int main(int argc, char** argv) {
     std::vector<ipc::event::BufferEntry> buffer(
         (size_t)(size / sizeof(ipc::event::BufferEntry)));
 
-    __debugbreak();
+    /**
+     * \note debug breaks are placed to measure only the performance of the
+     * detection algorithm, however, on a single thread. They are placed right
+     * before the first operation of the detector and right after the last one
+     */
+    // __debugbreak();
     DetectorOutput output(detec.c_str());
     if (in_file.read((char*)(buffer.data()), size).good()) {
       for (auto it = buffer.begin(); it != buffer.end(); ++it) {
         ipc::event::BufferEntry tmp = *it;
         output.makeOutput(&tmp);
       }
-      __debugbreak();
+      // __debugbreak();
     }
   } catch (const std::exception& e) {
     std::cerr << "Could not load detector: " << e.what() << std::endl;

diff --git a/standalone/detectors/fasttrack/CMakeLists.txt b/standalone/detectors/fasttrack/CMakeLists.txt
@@ -11,12 +11,14 @@
 set(FT_SOURCES
     "src/stacktrace"
     "src/varstate"
+    "src/MemoryPool"
     "src/threadstate")
 
 set(FT_TEST_SOURCES
     "test/fasttrack_test"
     "src/stacktrace"
     "src/varstate"
+    "src/MemoryPool"
     "src/threadstate")
 
 include(GenerateExportHeader)

diff --git a/standalone/detectors/fasttrack/include/MemoryPool.h b/standalone/detectors/fasttrack/include/MemoryPool.h
@@ -0,0 +1,61 @@
+#ifndef MEMORY_POOL_HEADER_H
+#define MEMORY_POOL_HEADER_H 1
+#pragma once
+
+/*
+ * DRace, a dynamic data race detector
+ *
+ * Copyright 2020 Siemens AG
+ *
+ * Authors:
+ *   Mihai Robescu <mihai-gabriel.robescu@siemens.com>
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <assert.h>
+#include <iostream>
+
+struct Chunk {
+  Chunk *next;  // pointer to the next Chunk, when chunk is free
+};
+
+// For each allocator there will be a separate instantion of the memory pool
+class MemoryPool {
+ private:
+  /// pointer to the first free address
+  Chunk *free_pointer = nullptr;
+
+  /// number of chunks in a block
+  size_t num_chunks;
+
+  /// chunk size equivalent to sizeof(T) from template specialization
+  size_t chunk_size;
+
+  /// block size
+  size_t block_size;
+
+  /// holds how many chunks were allocated until now
+  size_t chunks_allocated = 0;
+
+ public:
+  MemoryPool() = default;
+  MemoryPool(size_t chunkSize) : chunk_size(chunkSize) {}
+  MemoryPool(size_t chunkSize, size_t numChunks)
+      : chunk_size(chunkSize), num_chunks(numChunks) {
+    block_size = chunk_size * num_chunks;
+    free_pointer = get_more_memory();
+  }
+  Chunk *allocate() { return do_allocation(); }
+  void deallocate(void *ptr) { do_deallocation(ptr); }
+  void print_used_memory() {
+    std::cout << "Memory Allocated: " << chunks_allocated * chunk_size
+              << std::endl;
+  }
+
+ private:
+  Chunk *do_allocation();
+  Chunk *get_more_memory();  // allocate 1 block of chunks
+  void do_deallocation(void *ptr);
+};
+#endif  // !MEMORY_POOL_HEADER_H
diff --git a/standalone/detectors/fasttrack/include/PoolAllocator.h b/standalone/detectors/fasttrack/include/PoolAllocator.h
@@ -0,0 +1,199 @@
+#ifndef POOL_ALLOCATOR_HEADER_H
+#define POOL_ALLOCATOR_HEADER_H 1
+#pragma once
+
+/*
+ * DRace, a dynamic data race detector
+ *
+ * Copyright 2020 Siemens AG
+ *
+ * Authors:
+ *   Mihai Robescu <mihai-gabriel.robescu@siemens.com>
+ *   Felix Moessbauer <felix.moessbauer@siemens.com>
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <iostream>
+#include <limits>
+
+#include <ipc/spinlock.h>
+#include "MemoryPool.h"
+
+/**
+ *------------------------------------------------------------------------------
+ *
+ * Header File implementing a pool allocator. It can also be used
+ * in combination with the Segregator class, such as on line 136.
+ *
+ * There is also a thread-safe version. This is currently only
+ * experimental
+ *
+ *------------------------------------------------------------------------------
+ */
+
+template <class T, int threshold, class SmallAllocator, class LargeAllocator>
+class Segregator {
+ public:
+  using value_type = T;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  using pointer = T*;
+  using const_pointer = const T*;
+  using void_pointer = void*;
+  using const_void_pointer = const void*;
+
+  T* allocate(size_t n) {
+    size_t size = n * sizeof(T);
+    if (size < threshold) {
+      // return new (reinterpret_cast<void*>(SmallAllocator::allocate())) T();
+      return reinterpret_cast<void*>(SmallAllocator::allocate());
+    } else {
+      return reinterpret_cast<void*>(LargeAllocator::allocate());
+    }
+  }
+  void deallocate(T* p, std::size_t n) noexcept {
+    size_t size = n * sizeof(T);
+    if (size < threshold) {
+      return SmallAllocator::deallocate(reinterpret_cast<void*>(p));
+    } else {
+      return LargeAllocator::deallocate(reinterpret_cast<void*>(p));
+    }
+  }
+  template <typename U>
+  struct rebind {
+    using other = Segregator<U, threshold, SmallAllocator, LargeAllocator>;
+  };
+};
+
+template <typename T, size_t numChunks = 512>
+class PoolAllocator {
+ public:
+  using value_type = T;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  using pointer = T*;
+  using const_pointer = const T*;
+  using void_pointer = void*;
+  using const_void_pointer = const void*;
+
+  PoolAllocator<T, numChunks>() = default;
+  ~PoolAllocator<T, numChunks>() = default;
+
+  size_type max_size() const { return std::numeric_limits<size_type>::max(); }
+
+  static pointer allocate() {
+    return reinterpret_cast<pointer>(mem_pool.allocate());
+  }
+
+  static void deallocate(void* ptr) {
+    mem_pool.deallocate(ptr);
+  }
+
+  void usedMemory() { mem_pool.print_used_memory(); }
+
+  template <class U>
+  PoolAllocator(const PoolAllocator<U, numChunks>& other) {}
+
+  template <typename U>
+  struct rebind {
+    using other = PoolAllocator<U, numChunks>;
+  };
+
+ private:
+  static MemoryPool mem_pool;
+};
+template <typename T, size_t numChunks>
+MemoryPool PoolAllocator<T, numChunks>::mem_pool(sizeof(T), numChunks);
+
+template <size_t size, size_t numChunks = 512>
+class SizePoolAllocator {
+ public:
+  SizePoolAllocator<size, numChunks>() = default;
+  ~SizePoolAllocator<size, numChunks>() = default;
+  using size_type = size_t;
+
+  size_type max_size() const { return std::numeric_limits<size_type>::max(); }
+
+  static void* allocate() {
+    return reinterpret_cast<void*>(mem_pool.allocate());
+  }
+
+  static void deallocate(void* ptr) { mem_pool.deallocate(ptr); }
+
+  static void usedMemory() { mem_pool.print_used_memory(); }
+
+ private:
+  static MemoryPool mem_pool;
+};
+template <size_t size, size_t numChunks>
+MemoryPool SizePoolAllocator<size, numChunks>::mem_pool(size, numChunks);
+
+template <class T>
+using DRaceAllocator = Segregator<
+    T, 5, SizePoolAllocator<4>,
+    Segregator<
+        T, 9, SizePoolAllocator<8>,
+        Segregator<
+            T, 17, SizePoolAllocator<16>,
+            Segregator<T, 65, SizePoolAllocator<64>,
+                       Segregator<T, 257, SizePoolAllocator<256>,
+                                  Segregator<T, 1025, SizePoolAllocator<1024>,
+                                             std::allocator<T>>>>>>>;
+
+template <typename T, size_t numChunks = 512>
+class ThreadSafePoolAllocator {
+ private:
+  std::atomic<Chunk*> free_pointer{nullptr};    // pointer to the first free
+  size_t num_chunks = numChunks;                // number of chunks in a block
+  size_t chunk_size = sizeof(T);                // chunk size equivalent
+  size_t block_size = num_chunks * chunk_size;  // block size
+  size_t chunks_allocated = 0;  // how much memory was allocated until now
+
+ public:
+  using value_type = T;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  using pointer = T*;
+  using const_pointer = const T*;
+  using void_pointer = void*;
+  using const_void_pointer = const void*;
+
+  ThreadSafePoolAllocator<T, numChunks>() = default;
+  ~ThreadSafePoolAllocator<T, numChunks>() = default;
+
+  size_type max_size() const { return std::numeric_limits<size_type>::max(); }
+
+  pointer allocate() {
+    if (free_pointer.load(std::memory_order_release) == nullptr) {
+      free_pointer.store(get_more_memory(), std::memory_order_acquire);
+    }
+    // now we can for sure allocate all the objects.
+    Chunk* allocated = free_pointer.load(std::memory_order_release);
+    free_pointer.store(free_pointer.load(std::memory_order_release),
+                       std::memory_order_acquire);
+    chunks_allocated++;
+    return reinterpret_cast<pointer>(allocated);
+  }
+
+  Chunk* get_more_memory() {
+    Chunk* start = reinterpret_cast<Chunk*>(operator new(block_size));
+    Chunk* it = start;
+    for (size_t i = 0; i < num_chunks - 1; ++i) {
+      it->next =
+          reinterpret_cast<Chunk*>(reinterpret_cast<char*>(it) + chunk_size);
+      it = it->next;
+    }
+    it->next = nullptr;
+    return start;
+  }
+
+  void deallocate(void* ptr) {
+    Chunk* c = reinterpret_cast<Chunk*>(ptr);
+    c->next = free_pointer.load(std::memory_order_release);
+    free_pointer.store(c, std::memory_order_acquire);
+    chunks_allocated--;
+  }
+};
+
+#endif  //! POOL_ALLOCATOR_HEADER_H