Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Defensive code for alloc/dealloc during TLS teardown #161

Merged
merged 35 commits into from
Apr 7, 2020
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
69cb531
Defensive code for alloc/dealloc during TLS teardown
mjp41 Apr 1, 2020
9c5b674
Added some printing first operation to track progress
mjp41 Apr 1, 2020
c24d6be
Improve error messages on posix
mjp41 Apr 2, 2020
3fe006d
Detect incorrect use of pool.
mjp41 Apr 2, 2020
da66265
Clang format.
mjp41 Apr 2, 2020
372beed
Replace broken LL/SC implementation
mjp41 Apr 2, 2020
9d06cc7
Clang format.
mjp41 Apr 3, 2020
ff281d7
Improve TLS teardown.
mjp41 Apr 4, 2020
37a6e41
Make std::function fully inlined.
mjp41 Apr 4, 2020
992d9b9
Factor out PALLinux stack trace.
mjp41 Apr 6, 2020
7a150a8
Add checks for leaking allocators.
mjp41 Apr 6, 2020
e58f56c
Add release build of Windows Clang
mjp41 Apr 6, 2020
c6e7193
Fixup: TLS register_cleanup
mjp41 Apr 6, 2020
d86e662
Test for lazy init
mjp41 Apr 6, 2020
1715760
Use Clang-CL on Windows.
mjp41 Apr 6, 2020
91c1c49
Fix Clang-cl build
mjp41 Apr 6, 2020
af660e3
Format
mjp41 Apr 6, 2020
3cadd07
Fix initialisation on fast path to tls placeholder
mjp41 Apr 6, 2020
2d05dd3
Clang format
mjp41 Apr 6, 2020
a34dfb6
Clang-cl CI.
mjp41 Apr 6, 2020
cb39309
Add LLVM to path.
mjp41 Apr 6, 2020
660f144
Specify llvm-rc
mjp41 Apr 6, 2020
32ca0e0
Add VS Command prompt
mjp41 Apr 6, 2020
968a6f9
Add VS Command prompt
mjp41 Apr 6, 2020
1d63525
CMake clang-cl another attempt.
mjp41 Apr 6, 2020
db33f92
CMake clang-cl another attempt.
mjp41 Apr 6, 2020
da52280
Code review changes
mjp41 Apr 6, 2020
84e0414
More CI
mjp41 Apr 6, 2020
9a35d2c
Remove incorrectly committed line.
mjp41 Apr 6, 2020
6a23466
Ifdef fix
mjp41 Apr 6, 2020
3f93de5
vs command prompt setting
mjp41 Apr 6, 2020
11e7a36
Prevent inlining of error code.
mjp41 Apr 7, 2020
31ddb58
CR: Make Pool more generic.
mjp41 Apr 7, 2020
86cc1f5
Locally disable unreachable warning.
mjp41 Apr 7, 2020
5bcffbb
Code review and clang format.
mjp41 Apr 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY)
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG")
else()
add_compile_options(-fno-exceptions -fno-rtti -g -ftls-model=initial-exec -fomit-frame-pointer)
if(SNMALLOC_CI_BUILD OR (${CMAKE_BUILD_TYPE} MATCHES "Debug"))
# Get better stack traces in CI and Debug.
target_link_libraries(snmalloc_lib INTERFACE "-rdynamic")
endif()

if((${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") OR
(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") OR
Expand Down
7 changes: 6 additions & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,11 @@ jobs:
matrix:
64-bit Debug Clang:
BuildType: Debug
CMakeArgs: '-GNinja -DCMAKE_C_COMPILER="C:/Program Files/LLVM/bin/clang.exe" -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang++.exe"'
CMakeArgs: '-GNinja -DCMAKE_C_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_RC_COMPILER="C:/Program Files/LLVM/bin/llvm-rc"'
JFlag: -j 4
64-bit Release Clang:
BuildType: Release
CMakeArgs: '-GNinja -DCMAKE_C_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_RC_COMPILER="C:/Program Files/LLVM/bin/llvm-rc"'
JFlag: -j 4

steps:
Expand All @@ -235,6 +239,7 @@ jobs:

- script: |
choco install --accept-license -y Ninja llvm
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\VsDevCmd.bat"
displayName: 'Dependencies'

- task: CMake@1
Expand Down
142 changes: 91 additions & 51 deletions src/ds/aba.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,24 @@

#include "bits.h"

/**
* This file contains an abstraction of ABA protection. This API should be
* implementable with double-word compare and exchange or with load-link
* store conditional.
*
* We provide a lock based implementation.
*/
namespace snmalloc
{
#ifdef PLATFORM_IS_X86
// LL/SC typically can only perform one operation at a time
// check this on other platforms using a thread_local.
inline thread_local bool operation_in_flight = false;
mjp41 marked this conversation as resolved.
Show resolved Hide resolved

template<typename T, Construction c = RequiresInit>
class ABA
{
public:
#ifdef PLATFORM_IS_X86
struct alignas(2 * sizeof(std::size_t)) Linked
{
T* ptr;
Expand All @@ -28,21 +39,12 @@ namespace snmalloc
sizeof(Linked) == (2 * sizeof(std::size_t)),
"Expecting ABA to be the size of two pointers");

using Cmp = Linked;
#else
using Cmp = T*;
#endif

private:
#ifdef PLATFORM_IS_X86
union
{
alignas(2 * sizeof(std::size_t)) std::atomic<Linked> linked;
Independent independent;
};
#else
std::atomic<T*> raw;
#endif

public:
ABA()
Expand All @@ -53,66 +55,104 @@ namespace snmalloc

void init(T* x)
{
#ifdef PLATFORM_IS_X86
independent.ptr.store(x, std::memory_order_relaxed);
independent.aba.store(0, std::memory_order_relaxed);
#else
raw.store(x, std::memory_order_relaxed);
#endif
}

T* peek()
{
return
#ifdef PLATFORM_IS_X86
independent.ptr.load(std::memory_order_relaxed);
#else
raw.load(std::memory_order_relaxed);
#endif
}
struct Cmp;

Cmp read()
{
return
#ifdef PLATFORM_IS_X86
Cmp{independent.ptr.load(std::memory_order_relaxed),
independent.aba.load(std::memory_order_relaxed)};
#else
raw.load(std::memory_order_relaxed);
#endif
}
if (operation_in_flight)
error("Only one inflight ABA operation at a time is allowed.");
operation_in_flight = true;

static T* ptr(Cmp& from)
{
#ifdef PLATFORM_IS_X86
return from.ptr;
#else
return from;
#endif
return Cmp{{independent.ptr.load(std::memory_order_relaxed),
independent.aba.load(std::memory_order_relaxed)},
this};
}

bool compare_exchange(Cmp& expect, T* value)
struct Cmp
{
#ifdef PLATFORM_IS_X86
Linked old;
plietar marked this conversation as resolved.
Show resolved Hide resolved
ABA* parent;

T* ptr()
{
return old.ptr;
}

bool store_conditional(T* value)
{
# if defined(_MSC_VER) && defined(SNMALLOC_VA_BITS_64)
return _InterlockedCompareExchange128(
(volatile __int64*)&linked,
(__int64)(expect.aba + (uintptr_t)1),
(__int64)value,
(__int64*)&expect);
auto result = _InterlockedCompareExchange128(
(volatile __int64*)parent,
(__int64)(old.aba + (uintptr_t)1),
(__int64)value,
(__int64*)&old);
# else
# if defined(__GNUC__) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16)
#error You must compile with -mcx16 to enable 16-byte atomic compare and swap.
# endif
Cmp xchg{value, expect.aba + 1};
Linked xchg{value, old.aba + 1};
std::atomic<Linked>& addr = parent->linked;

return linked.compare_exchange_weak(
expect, xchg, std::memory_order_relaxed, std::memory_order_relaxed);
auto result = addr.compare_exchange_weak(
old, xchg, std::memory_order_relaxed, std::memory_order_relaxed);
# endif
return result;
}

~Cmp()
{
operation_in_flight = false;
}

Cmp(const Cmp&) = delete;
};
};
#else
return raw.compare_exchange_weak(
expect, value, std::memory_order_relaxed, std::memory_order_relaxed);
#endif
/**
* Naive implementation of ABA protection using a spin lock.
*/
template<typename T, Construction c = RequiresInit>
class ABA
{
std::atomic<T*> ptr = nullptr;
std::atomic_flag lock = ATOMIC_FLAG_INIT;

public:
struct Cmp;

Cmp read()
{
while (lock.test_and_set(std::memory_order_acquire))
Aal::pause();

return Cmp{this};
}

struct Cmp
{
ABA* parent;

public:
T* ptr()
{
return parent->ptr;
}

bool store_conditional(T* t)
{
parent->ptr = t;
return true;
}

~Cmp()
{
parent->lock.clear(std::memory_order_release);
}
};
};
#endif
} // namespace snmalloc
6 changes: 5 additions & 1 deletion src/ds/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ namespace snmalloc
void error(const char* const str);
} // namespace snmalloc

#define TOSTRING(expr) TOSTRING2(expr)
#define TOSTRING2(expr) #expr

#ifdef NDEBUG
# define SNMALLOC_ASSERT(expr) \
{}
Expand All @@ -44,7 +47,8 @@ namespace snmalloc
{ \
if (!(expr)) \
{ \
snmalloc::error("assert fail"); \
snmalloc::error("assert fail: " #expr " in " __FILE__ \
" on " TOSTRING(__LINE__)); \
} \
}
#endif
Expand Down
12 changes: 6 additions & 6 deletions src/ds/mpmcstack.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ namespace snmalloc

do
{
T* top = ABAT::ptr(cmp);
T* top = cmp.ptr();
last->next.store(top, std::memory_order_release);
} while (!stack.compare_exchange(cmp, first));
} while (!cmp.store_conditional(first));
}

T* pop()
Expand All @@ -44,13 +44,13 @@ namespace snmalloc

do
{
top = ABAT::ptr(cmp);
top = cmp.ptr();

if (top == nullptr)
break;

next = top->next.load(std::memory_order_acquire);
} while (!stack.compare_exchange(cmp, next));
} while (!cmp.store_conditional(next));

return top;
}
Expand All @@ -63,11 +63,11 @@ namespace snmalloc

do
{
top = ABAT::ptr(cmp);
top = cmp.ptr();

if (top == nullptr)
break;
} while (!stack.compare_exchange(cmp, nullptr));
} while (!cmp.store_conditional(nullptr));

return top;
}
Expand Down
49 changes: 28 additions & 21 deletions src/mem/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "sizeclasstable.h"
#include "slab.h"

#include <functional>

namespace snmalloc
{
enum Boundary
Expand Down Expand Up @@ -78,7 +80,7 @@ namespace snmalloc
*/
template<
bool (*NeedsInitialisation)(void*),
void* (*InitThreadAllocator)(),
void* (*InitThreadAllocator)(std::function<void*(void*)>&),
mjp41 marked this conversation as resolved.
Show resolved Hide resolved
class MemoryProvider = GlobalVirtual,
class ChunkMap = SNMALLOC_DEFAULT_CHUNKMAP,
bool IsQueueInline = true>
Expand Down Expand Up @@ -1095,20 +1097,20 @@ namespace snmalloc
stats().sizeclass_alloc(sizeclass);
return small_alloc_new_free_list<zero_mem, allow_reserve>(sizeclass);
}
return small_alloc_first_alloc<zero_mem, allow_reserve>(sizeclass, size);
return small_alloc_first_alloc<zero_mem, allow_reserve>(size);
}

/**
* Called on first allocation to set up the thread local allocator,
* then directs the allocation request to the newly created allocator.
*/
template<ZeroMem zero_mem, AllowReserve allow_reserve>
SNMALLOC_SLOW_PATH void*
small_alloc_first_alloc(sizeclass_t sizeclass, size_t size)
SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(size_t size)
{
auto replacement = InitThreadAllocator();
return reinterpret_cast<Allocator*>(replacement)
->template small_alloc_inner<zero_mem, allow_reserve>(sizeclass, size);
std::function<void*(void*)> f = [size](void* alloc) {
return reinterpret_cast<Allocator*>(alloc)->alloc(size);
};
return InitThreadAllocator(f);
}

/**
Expand Down Expand Up @@ -1285,10 +1287,10 @@ namespace snmalloc
{
if (NeedsInitialisation(this))
{
void* replacement = InitThreadAllocator();
return reinterpret_cast<Allocator*>(replacement)
->template medium_alloc<zero_mem, allow_reserve>(
sizeclass, rsize, size);
std::function<void*(void*)> f = [size](void* alloc) {
return reinterpret_cast<Allocator*>(alloc)->alloc(size);
};
return InitThreadAllocator(f);
}
slab = reinterpret_cast<Mediumslab*>(
large_allocator.template alloc<NoZero, allow_reserve>(
Expand Down Expand Up @@ -1359,9 +1361,10 @@ namespace snmalloc

if (NeedsInitialisation(this))
{
void* replacement = InitThreadAllocator();
return reinterpret_cast<Allocator*>(replacement)
->template large_alloc<zero_mem, allow_reserve>(size);
std::function<void*(void*)> f = [size](void* alloc) {
return reinterpret_cast<Allocator*>(alloc)->alloc(size);
};
return InitThreadAllocator(f);
}

size_t size_bits = bits::next_pow2_bits(size);
Expand All @@ -1386,9 +1389,12 @@ namespace snmalloc

if (NeedsInitialisation(this))
{
void* replacement = InitThreadAllocator();
return reinterpret_cast<Allocator*>(replacement)
->large_dealloc(p, size);
std::function<void*(void*)> f = [p](void* alloc) {
reinterpret_cast<Allocator*>(alloc)->dealloc(p);
return nullptr;
};
InitThreadAllocator(f);
return;
}

size_t size_bits = bits::next_pow2_bits(size);
Expand Down Expand Up @@ -1439,10 +1445,11 @@ namespace snmalloc
// a real allocator and construct one if we aren't.
if (NeedsInitialisation(this))
{
void* replacement = InitThreadAllocator();
// We have to do a dealloc, not a remote_dealloc here because this may
// have been allocated with the allocator that we've just had returned.
reinterpret_cast<Allocator*>(replacement)->dealloc(p);
std::function<void*(void*)> f = [p](void* alloc) {
reinterpret_cast<Allocator*>(alloc)->dealloc(p);
return nullptr;
};
InitThreadAllocator(f);
return;
}

Expand Down
Loading