Skip to content

Commit

Permalink
Defensive code for alloc/dealloc during TLS teardown (#161)
Browse files Browse the repository at this point in the history
* Defensive code for alloc/dealloc during TLS teardown

If an allocation or deallocation occurs during TLS teardown, then it is
possible for a new allocator to be created and then this is leaked. On
the mimalloc-bench mstressN benchmark this was observed leading to a
large memory leak.

This fix, detects if we are in the TLS teardown phase, and if so,
the calls to alloc or dealloc must return the allocator once they have
perform the specific operation.

Uses a separate variable to represent if a thread_local's destructor has
run already.  This is used to detect thread teardown to put the
allocator into a special slow path to avoid leaks.

* Added some printing first operation to track progress

* Improve error messages on posix

Flush errors, print assert details, and present stack traces.

* Detect incorrect use of pool.

* Clang format.

* Replace broken LL/SC implementation

LL/SC implementation was broken, this replaces it with
a locking implementation. Changes the API to support LL/SC
for future implementation on ARM.

* Improve TLS teardown.

* Make std::function fully inlined.

* Factor out PALLinux stack trace.

* Add checks for leaking allocators.

* Add release build of Windows Clang
  • Loading branch information
mjp41 authored Apr 7, 2020
1 parent d878880 commit 74657d9
Show file tree
Hide file tree
Showing 13 changed files with 357 additions and 145 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY)
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG")
else()
add_compile_options(-fno-exceptions -fno-rtti -g -ftls-model=initial-exec -fomit-frame-pointer)
if(SNMALLOC_CI_BUILD OR (${CMAKE_BUILD_TYPE} MATCHES "Debug"))
# Get better stack traces in CI and Debug.
target_link_libraries(snmalloc_lib INTERFACE "-rdynamic")
endif()

if((${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") OR
(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") OR
Expand Down
22 changes: 12 additions & 10 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,11 @@ jobs:
matrix:
64-bit Debug Clang:
BuildType: Debug
CMakeArgs: '-GNinja -DCMAKE_C_COMPILER="C:/Program Files/LLVM/bin/clang.exe" -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang++.exe"'
CMakeArgs: '-GNinja -DCMAKE_C_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_RC_COMPILER="C:/Program Files/LLVM/bin/llvm-rc"'
JFlag: -j 4
64-bit Release Clang:
BuildType: Release
CMakeArgs: '-GNinja -DCMAKE_C_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_RC_COMPILER="C:/Program Files/LLVM/bin/llvm-rc"'
JFlag: -j 4

steps:
Expand All @@ -237,15 +241,13 @@ jobs:
choco install --accept-license -y Ninja llvm
displayName: 'Dependencies'
- task: CMake@1
displayName: 'CMake .. $(CMakeArgs) -DCMAKE_BUILD_TYPE=$(BuildType) -DSNMALLOC_CI_BUILD=On -DSNMALLOC_RUST_SUPPORT=On'
inputs:
cmakeArgs: '.. $(CMakeArgs) -DCMAKE_BUILD_TYPE=$(BuildType) -DSNMALLOC_CI_BUILD=On -DSNMALLOC_RUST_SUPPORT=On'

- task: CMake@1
displayName: 'CMake --build . --config ${BuildType}'
inputs:
cmakeArgs: '--build . --config ${BuildType}'
- script: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
mkdir build
cd build
cmake .. $(CMakeArgs) -DCMAKE_BUILD_TYPE=$(BuildType) -DSNMALLOC_CI_BUILD=On -DSNMALLOC_RUST_SUPPORT=On
cmake --build . --config ${BuildType}
displayName: 'build'
- script: |
set PATH=%PATH%;"C:\Program Files\LLVM\bin\"
Expand Down
155 changes: 104 additions & 51 deletions src/ds/aba.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,25 @@

#include "bits.h"

/**
* This file contains an abstraction of ABA protection. This API should be
* implementable with double-word compare and exchange or with load-link
* store conditional.
*
* We provide a lock based implementation.
*/
namespace snmalloc
{
#ifndef NDEBUG
// LL/SC typically can only perform one operation at a time
// check this on other platforms using a thread_local.
inline thread_local bool operation_in_flight = false;
#endif
#ifdef PLATFORM_IS_X86
template<typename T, Construction c = RequiresInit>
class ABA
{
public:
#ifdef PLATFORM_IS_X86
struct alignas(2 * sizeof(std::size_t)) Linked
{
T* ptr;
Expand All @@ -28,21 +40,12 @@ namespace snmalloc
sizeof(Linked) == (2 * sizeof(std::size_t)),
"Expecting ABA to be the size of two pointers");

using Cmp = Linked;
#else
using Cmp = T*;
#endif

private:
#ifdef PLATFORM_IS_X86
union
{
alignas(2 * sizeof(std::size_t)) std::atomic<Linked> linked;
Independent independent;
};
#else
std::atomic<T*> raw;
#endif

public:
ABA()
Expand All @@ -53,66 +56,116 @@ namespace snmalloc

void init(T* x)
{
#ifdef PLATFORM_IS_X86
independent.ptr.store(x, std::memory_order_relaxed);
independent.aba.store(0, std::memory_order_relaxed);
#else
raw.store(x, std::memory_order_relaxed);
#endif
}

T* peek()
{
return
#ifdef PLATFORM_IS_X86
independent.ptr.load(std::memory_order_relaxed);
#else
raw.load(std::memory_order_relaxed);
#endif
}
struct Cmp;

Cmp read()
{
return
#ifdef PLATFORM_IS_X86
Cmp{independent.ptr.load(std::memory_order_relaxed),
independent.aba.load(std::memory_order_relaxed)};
#else
raw.load(std::memory_order_relaxed);
#endif
# ifndef NDEBUG
if (operation_in_flight)
error("Only one inflight ABA operation at a time is allowed.");
operation_in_flight = true;
# endif
return Cmp{{independent.ptr.load(std::memory_order_relaxed),
independent.aba.load(std::memory_order_relaxed)},
this};
}

static T* ptr(Cmp& from)
struct Cmp
{
#ifdef PLATFORM_IS_X86
return from.ptr;
#else
return from;
#endif
}
Linked old;
ABA* parent;

bool compare_exchange(Cmp& expect, T* value)
{
#ifdef PLATFORM_IS_X86
T* ptr()
{
return old.ptr;
}

bool store_conditional(T* value)
{
# if defined(_MSC_VER) && defined(SNMALLOC_VA_BITS_64)
return _InterlockedCompareExchange128(
(volatile __int64*)&linked,
(__int64)(expect.aba + (uintptr_t)1),
(__int64)value,
(__int64*)&expect);
auto result = _InterlockedCompareExchange128(
(volatile __int64*)parent,
(__int64)(old.aba + (uintptr_t)1),
(__int64)value,
(__int64*)&old);
# else
# if defined(__GNUC__) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16)
#error You must compile with -mcx16 to enable 16-byte atomic compare and swap.
# endif
Cmp xchg{value, expect.aba + 1};
Linked xchg{value, old.aba + 1};
std::atomic<Linked>& addr = parent->linked;

auto result = addr.compare_exchange_weak(
old, xchg, std::memory_order_relaxed, std::memory_order_relaxed);
# endif
return result;
}

return linked.compare_exchange_weak(
expect, xchg, std::memory_order_relaxed, std::memory_order_relaxed);
~Cmp()
{
# ifndef NDEBUG
operation_in_flight = false;
# endif
}

Cmp(const Cmp&) = delete;
};
};
#else
return raw.compare_exchange_weak(
expect, value, std::memory_order_relaxed, std::memory_order_relaxed);
#endif
/**
* Naive implementation of ABA protection using a spin lock.
*/
template<typename T, Construction c = RequiresInit>
class ABA
{
std::atomic<T*> ptr = nullptr;
std::atomic_flag lock = ATOMIC_FLAG_INIT;

public:
struct Cmp;

Cmp read()
{
while (lock.test_and_set(std::memory_order_acquire))
Aal::pause();

# ifndef NDEBUG
if (operation_in_flight)
error("Only one inflight ABA operation at a time is allowed.");
operation_in_flight = true;
# endif

return Cmp{this};
}

struct Cmp
{
ABA* parent;

public:
T* ptr()
{
return parent->ptr;
}

bool store_conditional(T* t)
{
parent->ptr = t;
return true;
}

~Cmp()
{
parent->lock.clear(std::memory_order_release);
# ifndef NDEBUG
operation_in_flight = false;
# endif
}
};
};
#endif
} // namespace snmalloc
6 changes: 5 additions & 1 deletion src/ds/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ namespace snmalloc
void error(const char* const str);
} // namespace snmalloc

#define TOSTRING(expr) TOSTRING2(expr)
#define TOSTRING2(expr) #expr

#ifdef NDEBUG
# define SNMALLOC_ASSERT(expr) \
{}
Expand All @@ -44,7 +47,8 @@ namespace snmalloc
{ \
if (!(expr)) \
{ \
snmalloc::error("assert fail"); \
snmalloc::error("assert fail: " #expr " in " __FILE__ \
" on " TOSTRING(__LINE__)); \
} \
}
#endif
Expand Down
12 changes: 6 additions & 6 deletions src/ds/mpmcstack.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ namespace snmalloc

do
{
T* top = ABAT::ptr(cmp);
T* top = cmp.ptr();
last->next.store(top, std::memory_order_release);
} while (!stack.compare_exchange(cmp, first));
} while (!cmp.store_conditional(first));
}

T* pop()
Expand All @@ -44,13 +44,13 @@ namespace snmalloc

do
{
top = ABAT::ptr(cmp);
top = cmp.ptr();

if (top == nullptr)
break;

next = top->next.load(std::memory_order_acquire);
} while (!stack.compare_exchange(cmp, next));
} while (!cmp.store_conditional(next));

return top;
}
Expand All @@ -63,11 +63,11 @@ namespace snmalloc

do
{
top = ABAT::ptr(cmp);
top = cmp.ptr();

if (top == nullptr)
break;
} while (!stack.compare_exchange(cmp, nullptr));
} while (!cmp.store_conditional(nullptr));

return top;
}
Expand Down
Loading

0 comments on commit 74657d9

Please sign in to comment.