Skip to content

Commit

Permalink
perf: add implementation for storing call stacks as a prefix tree
Browse files Browse the repository at this point in the history
- Implementations for storing call stack information will just have to
  implement 4 functions: make_trace(), get_current_element(),
  insert_function_element() and remove_function_element();
- PrefixTree_StackDepot is the first version of adapting a prefix tree
  to be used for storing call stack information. It uses hash maps
  instead of constant size arrays;
- PrefixTreeDepot.h represents the cache efficient optimized version for
  the prefix tree implementation.
  • Loading branch information
taranbis committed Jan 15, 2021
1 parent d4f6b5c commit 556b612
Show file tree
Hide file tree
Showing 16 changed files with 705 additions and 88 deletions.
1 change: 0 additions & 1 deletion standalone/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,4 @@ endif()

add_subdirectory("detectors/fasttrack")
add_subdirectory("binarydecoder")
add_subdirectory("helper")
add_subdirectory("ft_benchmark")
10 changes: 7 additions & 3 deletions standalone/binarydecoder/BinaryDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "DetectorOutput.h"

int main(int argc, char** argv) {
// std::string detec = "drace.detector.tsan.dll";
std::string detec = "drace.detector.fasttrack.standalone.dll";
std::string file = "trace.bin";

Expand All @@ -46,14 +45,19 @@ int main(int argc, char** argv) {
std::vector<ipc::event::BufferEntry> buffer(
(size_t)(size / sizeof(ipc::event::BufferEntry)));

__debugbreak();
/**
* \note debug breaks are placed to measure only the performance of the
* detection algorithm, however, on a single thread. They are placed right
* before the first operation of the detector and right after the last one
*/
// __debugbreak();
DetectorOutput output(detec.c_str());
if (in_file.read((char*)(buffer.data()), size).good()) {
for (auto it = buffer.begin(); it != buffer.end(); ++it) {
ipc::event::BufferEntry tmp = *it;
output.makeOutput(&tmp);
}
__debugbreak();
// __debugbreak();
}
} catch (const std::exception& e) {
std::cerr << "Could not load detector: " << e.what() << std::endl;
Expand Down
2 changes: 2 additions & 0 deletions standalone/detectors/fasttrack/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
set(FT_SOURCES
"src/stacktrace"
"src/varstate"
"src/MemoryPool"
"src/threadstate")

set(FT_TEST_SOURCES
"test/fasttrack_test"
"src/stacktrace"
"src/varstate"
"src/MemoryPool"
"src/threadstate")

include(GenerateExportHeader)
Expand Down
61 changes: 61 additions & 0 deletions standalone/detectors/fasttrack/include/MemoryPool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#ifndef MEMORY_POOL_HEADER_H
#define MEMORY_POOL_HEADER_H 1
#pragma once

/*
* DRace, a dynamic data race detector
*
* Copyright 2020 Siemens AG
*
* Authors:
* Mihai Robescu <mihai-gabriel.robescu@siemens.com>
*
* SPDX-License-Identifier: MIT
*/

#include <assert.h>
#include <iostream>

struct Chunk {
Chunk *next; // pointer to the next Chunk, when chunk is free
};

// For each allocator there will be a separate instantion of the memory pool
class MemoryPool {
private:
/// pointer to the first free address
Chunk *free_pointer = nullptr;

/// number of chunks in a block
size_t num_chunks;

/// chunk size equivalent to sizeof(T) from template specialization
size_t chunk_size;

/// block size
size_t block_size;

/// holds how many chunks were allocated until now
size_t chunks_allocated = 0;

public:
MemoryPool() = default;
MemoryPool(size_t chunkSize) : chunk_size(chunkSize) {}
MemoryPool(size_t chunkSize, size_t numChunks)
: chunk_size(chunkSize), num_chunks(numChunks) {
block_size = chunk_size * num_chunks;
free_pointer = get_more_memory();
}
Chunk *allocate() { return do_allocation(); }
void deallocate(void *ptr) { do_deallocation(ptr); }
void print_used_memory() {
std::cout << "Memory Allocated: " << chunks_allocated * chunk_size
<< std::endl;
}

private:
Chunk *do_allocation();
Chunk *get_more_memory(); // allocate 1 block of chunks
void do_deallocation(void *ptr);
};
#endif // !MEMORY_POOL_HEADER_H
199 changes: 199 additions & 0 deletions standalone/detectors/fasttrack/include/PoolAllocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#ifndef POOL_ALLOCATOR_HEADER_H
#define POOL_ALLOCATOR_HEADER_H 1
#pragma once

/*
* DRace, a dynamic data race detector
*
* Copyright 2020 Siemens AG
*
* Authors:
* Mihai Robescu <mihai-gabriel.robescu@siemens.com>
* Felix Moessbauer <felix.moessbauer@siemens.com>
*
* SPDX-License-Identifier: MIT
*/

#include <iostream>
#include <limits>

#include <ipc/spinlock.h>
#include "MemoryPool.h"

/**
*------------------------------------------------------------------------------
*
* Header File implementing a pool allocator. It can also be used
* in combination with the Segregator class, such as on line 136.
*
* There is also a thread-safe version. This is currently only
* experimental
*
*------------------------------------------------------------------------------
*/

template <class T, int threshold, class SmallAllocator, class LargeAllocator>
class Segregator {
public:
using value_type = T;
using size_type = size_t;
using difference_type = ptrdiff_t;
using pointer = T*;
using const_pointer = const T*;
using void_pointer = void*;
using const_void_pointer = const void*;

T* allocate(size_t n) {
size_t size = n * sizeof(T);
if (size < threshold) {
// return new (reinterpret_cast<void*>(SmallAllocator::allocate())) T();
return reinterpret_cast<void*>(SmallAllocator::allocate());
} else {
return reinterpret_cast<void*>(LargeAllocator::allocate());
}
}
void deallocate(T* p, std::size_t n) noexcept {
size_t size = n * sizeof(T);
if (size < threshold) {
return SmallAllocator::deallocate(reinterpret_cast<void*>(p));
} else {
return LargeAllocator::deallocate(reinterpret_cast<void*>(p));
}
}
template <typename U>
struct rebind {
using other = Segregator<U, threshold, SmallAllocator, LargeAllocator>;
};
};

template <typename T, size_t numChunks = 512>
class PoolAllocator {
public:
using value_type = T;
using size_type = size_t;
using difference_type = ptrdiff_t;
using pointer = T*;
using const_pointer = const T*;
using void_pointer = void*;
using const_void_pointer = const void*;

PoolAllocator<T, numChunks>() = default;
~PoolAllocator<T, numChunks>() = default;

size_type max_size() const { return std::numeric_limits<size_type>::max(); }

static pointer allocate() {
return reinterpret_cast<pointer>(mem_pool.allocate());
}

static void deallocate(void* ptr) {
mem_pool.deallocate(ptr);
}

void usedMemory() { mem_pool.print_used_memory(); }

template <class U>
PoolAllocator(const PoolAllocator<U, numChunks>& other) {}

template <typename U>
struct rebind {
using other = PoolAllocator<U, numChunks>;
};

private:
static MemoryPool mem_pool;
};
template <typename T, size_t numChunks>
MemoryPool PoolAllocator<T, numChunks>::mem_pool(sizeof(T), numChunks);

template <size_t size, size_t numChunks = 512>
class SizePoolAllocator {
public:
SizePoolAllocator<size, numChunks>() = default;
~SizePoolAllocator<size, numChunks>() = default;
using size_type = size_t;

size_type max_size() const { return std::numeric_limits<size_type>::max(); }

static void* allocate() {
return reinterpret_cast<void*>(mem_pool.allocate());
}

static void deallocate(void* ptr) { mem_pool.deallocate(ptr); }

static void usedMemory() { mem_pool.print_used_memory(); }

private:
static MemoryPool mem_pool;
};
template <size_t size, size_t numChunks>
MemoryPool SizePoolAllocator<size, numChunks>::mem_pool(size, numChunks);

template <class T>
using DRaceAllocator = Segregator<
T, 5, SizePoolAllocator<4>,
Segregator<
T, 9, SizePoolAllocator<8>,
Segregator<
T, 17, SizePoolAllocator<16>,
Segregator<T, 65, SizePoolAllocator<64>,
Segregator<T, 257, SizePoolAllocator<256>,
Segregator<T, 1025, SizePoolAllocator<1024>,
std::allocator<T>>>>>>>;

template <typename T, size_t numChunks = 512>
class ThreadSafePoolAllocator {
private:
std::atomic<Chunk*> free_pointer{nullptr}; // pointer to the first free
size_t num_chunks = numChunks; // number of chunks in a block
size_t chunk_size = sizeof(T); // chunk size equivalent
size_t block_size = num_chunks * chunk_size; // block size
size_t chunks_allocated = 0; // how much memory was allocated until now

public:
using value_type = T;
using size_type = size_t;
using difference_type = ptrdiff_t;
using pointer = T*;
using const_pointer = const T*;
using void_pointer = void*;
using const_void_pointer = const void*;

ThreadSafePoolAllocator<T, numChunks>() = default;
~ThreadSafePoolAllocator<T, numChunks>() = default;

size_type max_size() const { return std::numeric_limits<size_type>::max(); }

pointer allocate() {
if (free_pointer.load(std::memory_order_release) == nullptr) {
free_pointer.store(get_more_memory(), std::memory_order_acquire);
}
// now we can for sure allocate all the objects.
Chunk* allocated = free_pointer.load(std::memory_order_release);
free_pointer.store(free_pointer.load(std::memory_order_release),
std::memory_order_acquire);
chunks_allocated++;
return reinterpret_cast<pointer>(allocated);
}

Chunk* get_more_memory() {
Chunk* start = reinterpret_cast<Chunk*>(operator new(block_size));
Chunk* it = start;
for (size_t i = 0; i < num_chunks - 1; ++i) {
it->next =
reinterpret_cast<Chunk*>(reinterpret_cast<char*>(it) + chunk_size);
it = it->next;
}
it->next = nullptr;
return start;
}

void deallocate(void* ptr) {
Chunk* c = reinterpret_cast<Chunk*>(ptr);
c->next = free_pointer.load(std::memory_order_release);
free_pointer.store(c, std::memory_order_acquire);
chunks_allocated--;
}
};

#endif //! POOL_ALLOCATOR_HEADER_H
Loading

0 comments on commit 556b612

Please sign in to comment.