Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazy tls #64

Merged
merged 11 commits into from
Jul 8, 2019
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG")
else()
add_compile_options(-march=native -fno-exceptions -fno-rtti -g -ftls-model=initial-exec)
add_compile_options(-march=native -fno-exceptions -fno-rtti -g -ftls-model=initial-exec -fomit-frame-pointer)
endif()

macro(subdirlist result curdir)
Expand Down
113 changes: 96 additions & 17 deletions src/mem/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,11 @@ namespace snmalloc
FastFreeLists() : small_fast_free_lists() {}
};

SNMALLOC_FAST_PATH void* no_replacement(void*)
{
return nullptr;
}

/**
* Allocator. This class is parameterised on three template parameters. The
* `MemoryProvider` defines the source of memory for this allocator.
Expand All @@ -245,18 +250,27 @@ namespace snmalloc
* to associate metadata with large (16MiB, by default) regions, allowing an
* allocator to find the allocator responsible for that region.
*
* The final template parameter, `IsQueueInline`, defines whether the
* The next template parameter, `IsQueueInline`, defines whether the
* message queue for this allocator should be stored as a field of the
* allocator (`true`) or provided externally, allowing it to be anywhere else
* in the address space (`false`).
*
* The final template parameter provides a hook to allow the allocator in use
* to be dynamically modified. This is used to implement a trick from
* mimalloc that avoids a conditional branch on the fast path. We initialise
* the thread-local allocator pointer with the address of a global allocator,
* which never owns any memory. When we try to allocate memory, we call the
* replacement function.
*/
template<
class MemoryProvider = GlobalVirtual,
class PageMap = SNMALLOC_DEFAULT_PAGEMAP,
bool IsQueueInline = true>
bool IsQueueInline = true,
void* (*Replacement)(void*) = no_replacement>
class Allocator
: public FastFreeLists,
public Pooled<Allocator<MemoryProvider, PageMap, IsQueueInline>>
public Pooled<
Allocator<MemoryProvider, PageMap, IsQueueInline, Replacement>>
{
LargeAlloc<MemoryProvider> large_allocator;
PageMap page_map;
Expand All @@ -274,7 +288,7 @@ namespace snmalloc
size_t size,
ZeroMem zero_mem = NoZero,
AllowReserve allow_reserve = YesReserve>
ALLOCATOR void* alloc()
SNMALLOC_FAST_PATH ALLOCATOR void* alloc()
{
static_assert(size != 0, "Size must not be zero.");
#ifdef USE_MALLOC
Expand Down Expand Up @@ -310,7 +324,7 @@ namespace snmalloc
}

template<ZeroMem zero_mem = NoZero, AllowReserve allow_reserve = YesReserve>
inline ALLOCATOR void* alloc(size_t size)
SNMALLOC_FAST_PATH ALLOCATOR void* alloc(size_t size)
{
#ifdef USE_MALLOC
static_assert(
Expand Down Expand Up @@ -637,25 +651,33 @@ namespace snmalloc

struct RemoteCache
{
size_t size = 0;
/**
* The total amount of memory stored awaiting dispatch to other
* allocators. This is initialised to the maximum size that we use
* before caching so that, when we hit the slow path and need to dispatch
* everything, we can check if we are a real allocator and lazily provide
* a real allocator.
*/
size_t size = REMOTE_CACHE;
RemoteList list[REMOTE_SLOTS];

/// Used to find the index into the array of queues for remote
/// deallocation
/// r is used for which round of sending this is.
inline size_t get_slot(size_t id, size_t r)
{
constexpr size_t allocator_size =
sizeof(Allocator<MemoryProvider, PageMap, IsQueueInline>);
constexpr size_t allocator_size = sizeof(
Allocator<MemoryProvider, PageMap, IsQueueInline, Replacement>);
constexpr size_t initial_shift =
bits::next_pow2_bits_const(allocator_size);
assert((initial_shift - (r * REMOTE_SLOT_BITS)) < 64);
return (id >> (initial_shift + (r * REMOTE_SLOT_BITS))) & REMOTE_MASK;
}

SNMALLOC_FAST_PATH void
dealloc(alloc_id_t target_id, void* p, sizeclass_t sizeclass)
dealloc_sized(alloc_id_t target_id, void* p, size_t objectsize)
{
this->size += sizeclass_to_size(sizeclass);
this->size += objectsize;

Remote* r = static_cast<Remote*>(p);
r->set_target_id(target_id);
Expand All @@ -666,6 +688,12 @@ namespace snmalloc
l->last = r;
}

SNMALLOC_FAST_PATH void
dealloc(alloc_id_t target_id, void* p, sizeclass_t sizeclass)
{
dealloc_sized(target_id, p, sizeclass_to_size(sizeclass));
}

void post(alloc_id_t id)
{
// When the cache gets big, post lists to their target allocators.
Expand Down Expand Up @@ -780,7 +808,10 @@ namespace snmalloc

public:
Allocator(
MemoryProvider& m, PageMap&& p = PageMap(), RemoteAllocator* r = nullptr)
MemoryProvider& m,
PageMap&& p = PageMap(),
RemoteAllocator* r = nullptr,
bool isFake = false)
: large_allocator(m), page_map(p)
{
if constexpr (IsQueueInline)
Expand All @@ -796,6 +827,11 @@ namespace snmalloc
if (id() >= static_cast<alloc_id_t>(-1))
error("Id should not be -1");

// If this is fake, don't do any of the bits of initialisation that may
// allocate memory.
if (isFake)
return;

init_message_queue();
message_queue().invariant();

Expand Down Expand Up @@ -1036,7 +1072,7 @@ namespace snmalloc
assert(sizeclass < NUM_SMALL_CLASSES);
auto& fl = small_fast_free_lists[sizeclass];
auto head = fl.value;
if (likely((reinterpret_cast<size_t>(head) & 1) == 0))
if (likely(head != nullptr))
{
void* p = head;
// Read the next slot from the memory that's about to be allocated.
Expand All @@ -1055,6 +1091,11 @@ namespace snmalloc
template<ZeroMem zero_mem, AllowReserve allow_reserve>
SNMALLOC_SLOW_PATH void* small_alloc_slow(sizeclass_t sizeclass)
{
if (void* replacement = Replacement(this))
{
return reinterpret_cast<Allocator*>(replacement)
->template small_alloc_slow<zero_mem, allow_reserve>(sizeclass);
}
handle_message_queue();
size_t rsize = sizeclass_to_size(sizeclass);
auto& sl = small_classes[sizeclass];
Expand Down Expand Up @@ -1205,6 +1246,12 @@ namespace snmalloc
}
else
{
if (void* replacement = Replacement(this))
{
return reinterpret_cast<Allocator*>(replacement)
->template medium_alloc<zero_mem, allow_reserve>(
sizeclass, rsize, size);
}
slab = reinterpret_cast<Mediumslab*>(
large_allocator.template alloc<NoZero, allow_reserve>(
0, SUPERSLAB_SIZE));
Expand Down Expand Up @@ -1277,6 +1324,12 @@ namespace snmalloc
zero_mem == YesZero ? "zeromem" : "nozeromem",
allow_reserve == NoReserve ? "noreserve" : "reserve"));

if (void* replacement = Replacement(this))
{
return reinterpret_cast<Allocator*>(replacement)
->template large_alloc<zero_mem, allow_reserve>(size);
}

size_t size_bits = bits::next_pow2_bits(size);
size_t large_class = size_bits - SUPERSLAB_BITS;
assert(large_class < NUM_LARGE_CLASSES);
Expand Down Expand Up @@ -1313,21 +1366,47 @@ namespace snmalloc
large_allocator.dealloc(slab, large_class);
}

SNMALLOC_FAST_PATH void
remote_dealloc(RemoteAllocator* target, void* p, sizeclass_t sizeclass)
#if defined(__GNUC__) && !defined(__clang__) && !defined(__OPTIMIZE__)
// Don't force this to be always inlined in debug builds with GCC, because
// it will fail and then raise an error.
inline
#else
SNMALLOC_FAST_PATH
#endif
void
remote_dealloc(RemoteAllocator* target, void* p, sizeclass_t sizeclass)
{
MEASURE_TIME(remote_dealloc, 4, 16);
assert(target->id() != id());

handle_message_queue();

void* offseted = apply_cache_friendly_offset(p, sizeclass);

// Check whether this will overflow the cache first. If we are a fake
// allocator, then our cache will always be full and so we will never hit
// this path.
size_t sz = sizeclass_to_size(sizeclass);
if ((remote.size + sz) < REMOTE_CACHE)
{
stats().remote_free(sizeclass);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add assert that we are not fake at this point!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps put something similar in other places where we don't expect to see the fake allocator.

remote.dealloc_sized(target->id(), offseted, sz);
return;
}
// Now that we've established that we're in the slow path (if we're a
// real allocator, we will have to empty our cache now), check if we are
// a real allocator and construct one if we aren't.
if (void* replacement = Replacement(this))
{
// We have to do a dealloc, not a remote_dealloc here because this may
// have been allocated with the allocator that we've just had returned.
reinterpret_cast<Allocator*>(replacement)->dealloc(p);
return;
}

stats().remote_free(sizeclass);
remote.dealloc(target->id(), offseted, sizeclass);

if (remote.size < REMOTE_CACHE)
return;

stats().remote_post();
remote.post(id());
}
Expand Down
21 changes: 17 additions & 4 deletions src/mem/globalalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,25 @@

namespace snmalloc
{
inline void* lazy_replacement(void*);
using Alloc =
Allocator<GlobalVirtual, SNMALLOC_DEFAULT_PAGEMAP, true, lazy_replacement>;

template<class MemoryProvider>
class AllocPool : Pool<Allocator<MemoryProvider>, MemoryProvider>
class AllocPool : Pool<
Allocator<
MemoryProvider,
SNMALLOC_DEFAULT_PAGEMAP,
true,
lazy_replacement>,
MemoryProvider>
{
using Alloc = Allocator<MemoryProvider>;
using Parent = Pool<Allocator<MemoryProvider>, MemoryProvider>;
using Alloc = Allocator<
MemoryProvider,
SNMALLOC_DEFAULT_PAGEMAP,
true,
lazy_replacement>;
using Parent = Pool<Alloc, MemoryProvider>;

public:
static AllocPool* make(MemoryProvider& mp)
Expand Down Expand Up @@ -175,5 +189,4 @@ namespace snmalloc
return AllocPool<MemoryProvider>::make(mp);
}

using Alloc = Allocator<GlobalVirtual>;
} // namespace snmalloc
10 changes: 5 additions & 5 deletions src/mem/metaslab.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,10 @@ namespace snmalloc
{
#ifndef NDEBUG
size_t length = 0;
void* curr = pointer_offset(slab, head);
void* curr_slow = pointer_offset(slab, head);
void* curr = (head == 1) ? nullptr : pointer_offset(slab, head);
void* curr_slow = (head == 1) ? nullptr : pointer_offset(slab, head);
bool both = false;
while ((reinterpret_cast<size_t>(curr) & 1) == 0)
while (curr != nullptr)
{
curr = follow_next(curr);
if (both)
Expand Down Expand Up @@ -200,8 +200,8 @@ namespace snmalloc
UNUSED(length);

// Walk bump-free-list-segment accounting for unused space
void* curr = pointer_offset(slab, head);
while ((address_cast(curr) & 1) == 0)
void* curr = (head == 1) ? nullptr : pointer_offset(slab, head);
while (curr != nullptr)
{
// Check we are looking at a correctly aligned block
void* start = curr;
Expand Down
10 changes: 6 additions & 4 deletions src/mem/slab.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace snmalloc
struct FreeListHead
{
// Use a value with bottom bit set for empty list.
void* value = pointer_offset<void*>(nullptr, 1);
void* value = nullptr;
};

class Slab
Expand Down Expand Up @@ -98,15 +98,16 @@ namespace snmalloc
}
else
{
Metaslab::store_next(curr, pointer_offset(this, bumpptr));
Metaslab::store_next(
curr, (bumpptr == 1) ? nullptr : pointer_offset(this, bumpptr));
}
curr = pointer_offset(this, bumpptr);
bumpptr = newbumpptr;
meta.allocated = meta.allocated + 1;
}

assert(curr != nullptr);
Metaslab::store_next(curr, pointer_offset<void*>(nullptr, 1));
Metaslab::store_next(curr, nullptr);
}
}

Expand Down Expand Up @@ -177,7 +178,8 @@ namespace snmalloc
assert(meta.valid_head(is_short()));

// Set the next pointer to the previous head.
Metaslab::store_next(p, pointer_offset(this, head));
Metaslab::store_next(
p, (head == 1) ? nullptr : pointer_offset(this, head));
meta.debug_slab_invariant(is_short(), this);
return true;
}
Expand Down
Loading