From 1e454cc8383f76780abb544ca3377f1ea79ca823 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 9 Mar 2020 18:37:59 +0000 Subject: [PATCH 01/37] Remote dealloc refactor. --- src/mem/alloc.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 9d14e0a85..42a80fa00 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1277,12 +1277,13 @@ namespace snmalloc return; } - remote_dealloc_slow(target, p, sizeclass); + remote_dealloc_slow(target, p , sizeclass); } - SNMALLOC_SLOW_PATH void - remote_dealloc_slow(RemoteAllocator* target, void* p, sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH + void remote_dealloc_slow(RemoteAllocator* target, void* offseted, sizeclass_t sizeclass) { + MEASURE_TIME(remote_dealloc, 4, 16); SNMALLOC_ASSERT(target->id() != id()); // Now that we've established that we're in the slow path (if we're a @@ -1290,6 +1291,7 @@ namespace snmalloc // a real allocator and construct one if we aren't. if (void* replacement = Replacement(this)) { + void* p = remove_cache_friendly_offset(offseted, sizeclass); // We have to do a dealloc, not a remote_dealloc here because this may // have been allocated with the allocator that we've just had returned. reinterpret_cast(replacement)->dealloc(p); From 91b7e08635c6310e061367dd50c4231069d05eab Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 9 Mar 2020 19:00:14 +0000 Subject: [PATCH 02/37] Clang format --- src/mem/alloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 42a80fa00..217e63fdc 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1281,7 +1281,8 @@ namespace snmalloc } SNMALLOC_SLOW_PATH - void remote_dealloc_slow(RemoteAllocator* target, void* offseted, sizeclass_t sizeclass) + void remote_dealloc_slow( + RemoteAllocator* target, void* offseted, sizeclass_t sizeclass) { MEASURE_TIME(remote_dealloc, 4, 16); SNMALLOC_ASSERT(target->id() != id()); From ffb7b82643a8671e6f6f517e24901350d8da7266 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 11:11:15 +0000 Subject: [PATCH 03/37] Clang format again. --- src/mem/alloc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 217e63fdc..87ef9c672 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1277,16 +1277,15 @@ namespace snmalloc return; } - remote_dealloc_slow(target, p , sizeclass); + remote_dealloc_slow(target, p, sizeclass); } SNMALLOC_SLOW_PATH void remote_dealloc_slow( RemoteAllocator* target, void* offseted, sizeclass_t sizeclass) { - MEASURE_TIME(remote_dealloc, 4, 16); SNMALLOC_ASSERT(target->id() != id()); - + // Now that we've established that we're in the slow path (if we're a // real allocator, we will have to empty our cache now), check if we are // a real allocator and construct one if we aren't. From 78f40d46098e42d99c0105583c65f929d8dc972d Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 9 Mar 2020 20:13:21 +0000 Subject: [PATCH 04/37] Improve remote dealloc Change remote to count down 0, so fast path does not need a constant. Use signed value so that branch does not depend on addition. --- src/mem/alloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 87ef9c672..2fde08cc1 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1285,7 +1285,7 @@ namespace snmalloc RemoteAllocator* target, void* offseted, sizeclass_t sizeclass) { SNMALLOC_ASSERT(target->id() != id()); - + // Now that we've established that we're in the slow path (if we're a // real allocator, we will have to empty our cache now), check if we are // a real allocator and construct one if we aren't. From a22e4385a3e1ea1c7eddbb91cb39f881b53e10c4 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 10 Mar 2020 07:58:17 +0000 Subject: [PATCH 05/37] CR feedback. --- src/mem/alloc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 2fde08cc1..29f683f3f 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1282,7 +1282,7 @@ namespace snmalloc SNMALLOC_SLOW_PATH void remote_dealloc_slow( - RemoteAllocator* target, void* offseted, sizeclass_t sizeclass) + RemoteAllocator* target, void* p, sizeclass_t sizeclass) { SNMALLOC_ASSERT(target->id() != id()); @@ -1291,7 +1291,6 @@ namespace snmalloc // a real allocator and construct one if we aren't. if (void* replacement = Replacement(this)) { - void* p = remove_cache_friendly_offset(offseted, sizeclass); // We have to do a dealloc, not a remote_dealloc here because this may // have been allocated with the allocator that we've just had returned. reinterpret_cast(replacement)->dealloc(p); From 0274b71501bfd76c393ad7f45b7f221e5a626ab7 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 10 Mar 2020 08:03:52 +0000 Subject: [PATCH 06/37] Clang format. --- src/mem/alloc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 29f683f3f..9d14e0a85 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1280,9 +1280,8 @@ namespace snmalloc remote_dealloc_slow(target, p, sizeclass); } - SNMALLOC_SLOW_PATH - void remote_dealloc_slow( - RemoteAllocator* target, void* p, sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH void + remote_dealloc_slow(RemoteAllocator* target, void* p, sizeclass_t sizeclass) { SNMALLOC_ASSERT(target->id() != id()); From d0806545b81da3d458370406fcd4070c04ce3648 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 10 Mar 2020 13:38:22 +0000 Subject: [PATCH 07/37] Inline remote_dealloc The fast path of remote_dealloc is sufficiently compact that it can be inlined. --- src/mem/alloc.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 9d14e0a85..1b9b0bcd0 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1255,11 +1255,10 @@ namespace snmalloc large_allocator.dealloc(slab, large_class); } - // Note that this is on the slow path as it lead to better code. - // As it is tail, not inlining means that it is jumped to, so has no perf - // impact on the producer consumer scenarios, and doesn't require register - // spills in the fast path for local deallocation. - SNMALLOC_SLOW_PATH + // This is still considered the fast path as all the complex code is tail + // called in its slow path. This leads to one fewer unconditional jump in + // Clang. + SNMALLOC_FAST_PATH void remote_dealloc(RemoteAllocator* target, void* p, sizeclass_t sizeclass) { MEASURE_TIME(remote_dealloc, 4, 16); From bac633610a4939023d9115c62601f93ed163ef5e Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 10 Mar 2020 17:52:16 +0000 Subject: [PATCH 08/37] Improve fast path in Slab::alloc Turn the internal structure into tail calls, to improve fast path. Should be no algorithmic changes. --- src/mem/slab.h | 150 ++++++++++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 65 deletions(-) diff --git a/src/mem/slab.h b/src/mem/slab.h index 3274b0561..e1a2cb9fa 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -32,7 +32,7 @@ namespace snmalloc } template - inline void* alloc( + SNMALLOC_FAST_PATH void* alloc( SlabList& sl, FreeListHead& fast_free_list, size_t rsize, @@ -48,83 +48,103 @@ namespace snmalloc SNMALLOC_ASSERT(!meta.is_full()); meta.debug_slab_invariant(this); - void* p = nullptr; - bool p_has_value = false; + if (unlikely(head == nullptr)) + { + return alloc_refill(meta, sl, fast_free_list, rsize, memory_provider); + } - if (head == nullptr) + return alloc_pull_from_list(meta, fast_free_list, rsize, memory_provider); + } + + template + SNMALLOC_FAST_PATH void* alloc_pull_from_list( + Metaslab& meta, + FreeListHead& fast_free_list, + size_t rsize, + MemoryProvider& memory_provider) + { + void* p = meta.head; + + // Read the next slot from the memory that's about to be allocated. + void* next = Metaslab::follow_next(p); + // Put everything in allocators small_class free list. + meta.head = nullptr; + fast_free_list.value = next; + // Treat stealing the free list as allocating it all. + // Link is not in use, i.e. - 1 is required. + meta.needed = meta.allocated - 1; + + p = remove_cache_friendly_offset(p, meta.sizeclass); + + return alloc_finish(meta, p, rsize, memory_provider); + } + + template + SNMALLOC_SLOW_PATH void* alloc_refill( + Metaslab& meta, + SlabList& sl, + FreeListHead& fast_free_list, + size_t rsize, + MemoryProvider& memory_provider) + { + size_t bumpptr = get_initial_offset(meta.sizeclass, is_short()); + bumpptr += meta.allocated * rsize; + if (bumpptr == SLAB_SIZE) + { + // Everything is in use, so we need all entries to be + // return before we can reclaim this slab. + meta.needed = meta.allocated; + + void* link = pointer_offset(this, meta.link); + void* p = remove_cache_friendly_offset(link, meta.sizeclass); + + meta.set_full(); + sl.pop(); + return alloc_finish(meta, p, rsize, memory_provider); + } + // Allocate the last object on the current page if there is one, + // and then thread the next free list worth of allocations. + bool crossed_page_boundary = false; + void* curr = nullptr; + while (true) { - size_t bumpptr = get_initial_offset(meta.sizeclass, is_short()); - bumpptr += meta.allocated * rsize; - if (bumpptr == SLAB_SIZE) + size_t newbumpptr = bumpptr + rsize; + auto alignedbumpptr = bits::align_up(bumpptr - 1, OS_PAGE_SIZE); + auto alignednewbumpptr = bits::align_up(newbumpptr, OS_PAGE_SIZE); + + if (alignedbumpptr != alignednewbumpptr) { - // Everything is in use, so we need all entries to be - // return before we can reclaim this slab. - meta.needed = meta.allocated; + // We have crossed a page boundary already, so + // lets stop building our free list. + if (crossed_page_boundary) + break; - void* link = pointer_offset(this, meta.link); - p = remove_cache_friendly_offset(link, meta.sizeclass); + crossed_page_boundary = true; + } - meta.set_full(); - sl.pop(); - p_has_value = true; + if (curr == nullptr) + { + meta.head = pointer_offset(this, bumpptr); } else { - // Allocate the last object on the current page if there is one, - // and then thread the next free list worth of allocations. - bool crossed_page_boundary = false; - void* curr = nullptr; - while (true) - { - size_t newbumpptr = bumpptr + rsize; - auto alignedbumpptr = bits::align_up(bumpptr - 1, OS_PAGE_SIZE); - auto alignednewbumpptr = bits::align_up(newbumpptr, OS_PAGE_SIZE); - - if (alignedbumpptr != alignednewbumpptr) - { - // We have crossed a page boundary already, so - // lets stop building our free list. - if (crossed_page_boundary) - break; - - crossed_page_boundary = true; - } - - if (curr == nullptr) - { - meta.head = pointer_offset(this, bumpptr); - } - else - { - Metaslab::store_next( - curr, (bumpptr == 1) ? nullptr : pointer_offset(this, bumpptr)); - } - curr = pointer_offset(this, bumpptr); - bumpptr = newbumpptr; - meta.allocated = meta.allocated + 1; - } - - SNMALLOC_ASSERT(curr != nullptr); - Metaslab::store_next(curr, nullptr); + Metaslab::store_next( + curr, (bumpptr == 1) ? nullptr : pointer_offset(this, bumpptr)); } + curr = pointer_offset(this, bumpptr); + bumpptr = newbumpptr; + meta.allocated = meta.allocated + 1; } - if (!p_has_value) - { - p = meta.head; - - // Read the next slot from the memory that's about to be allocated. - void* next = Metaslab::follow_next(p); - // Put everything in allocators small_class free list. - meta.head = nullptr; - fast_free_list.value = next; - // Treat stealing the free list as allocating it all. - // Link is not in use, i.e. - 1 is required. - meta.needed = meta.allocated - 1; + SNMALLOC_ASSERT(curr != nullptr); + Metaslab::store_next(curr, nullptr); - p = remove_cache_friendly_offset(p, meta.sizeclass); - } + return alloc_pull_from_list(meta, fast_free_list, rsize, memory_provider); + } + template + SNMALLOC_FAST_PATH void* alloc_finish(Metaslab& meta, void* p, size_t rsize, MemoryProvider& memory_provider) + { SNMALLOC_ASSERT(is_start_of_object(Superslab::get(p), p)); meta.debug_slab_invariant(this); From a52aca673381f0796d770896d7cfbf2dc1703c49 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 10 Mar 2020 17:55:18 +0000 Subject: [PATCH 09/37] Refactor initialisation to help fast path. Break lazy initialisation into two functions, so it is easier to codegen fast paths. --- src/mem/alloc.h | 24 ++++++++++++------------ src/mem/globalalloc.h | 16 +++++++++------- src/mem/threadalloc.h | 27 +++++++++++++++------------ 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 1b9b0bcd0..67765bb12 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -49,11 +49,6 @@ namespace snmalloc FastFreeLists() : small_fast_free_lists() {} }; - SNMALLOC_FAST_PATH void* no_replacement(void*) - { - return nullptr; - } - /** * Allocator. This class is parameterised on three template parameters. The * `MemoryProvider` defines the source of memory for this allocator. @@ -78,14 +73,16 @@ namespace snmalloc * replacement function. */ template< + bool (*IsFirstAllocation)(void*), + void* (*InitThreadAllocator)(), class MemoryProvider = GlobalVirtual, class ChunkMap = SNMALLOC_DEFAULT_CHUNKMAP, - bool IsQueueInline = true, - void* (*Replacement)(void*) = no_replacement> + bool IsQueueInline = true + > class Allocator : public FastFreeLists, public Pooled< - Allocator> + Allocator> { LargeAlloc large_allocator; ChunkMap chunk_map; @@ -506,7 +503,7 @@ namespace snmalloc inline size_t get_slot(size_t id, size_t r) { constexpr size_t allocator_size = sizeof( - Allocator); + Allocator); constexpr size_t initial_shift = bits::next_pow2_bits_const(allocator_size); SNMALLOC_ASSERT((initial_shift + (r * REMOTE_SLOT_BITS)) < 64); @@ -1146,8 +1143,9 @@ namespace snmalloc } else { - if (void* replacement = Replacement(this)) + if (IsFirstAllocation(this)) { + void* replacement = InitThreadAllocator(); return reinterpret_cast(replacement) ->template medium_alloc( sizeclass, rsize, size); @@ -1218,8 +1216,9 @@ namespace snmalloc zero_mem == YesZero ? "zeromem" : "nozeromem", allow_reserve == NoReserve ? "noreserve" : "reserve")); - if (void* replacement = Replacement(this)) + if (IsFirstAllocation(this)) { + void* replacement = InitThreadAllocator(); return reinterpret_cast(replacement) ->template large_alloc(size); } @@ -1287,8 +1286,9 @@ namespace snmalloc // Now that we've established that we're in the slow path (if we're a // real allocator, we will have to empty our cache now), check if we are // a real allocator and construct one if we aren't. - if (void* replacement = Replacement(this)) + if (IsFirstAllocation(this)) { + void* replacement = InitThreadAllocator(); // We have to do a dealloc, not a remote_dealloc here because this may // have been allocated with the allocator that we've just had returned. reinterpret_cast(replacement)->dealloc(p); diff --git a/src/mem/globalalloc.h b/src/mem/globalalloc.h index c790026fa..a344f3b99 100644 --- a/src/mem/globalalloc.h +++ b/src/mem/globalalloc.h @@ -6,24 +6,26 @@ namespace snmalloc { - inline void* lazy_replacement(void*); + inline bool first_allocation(void*); + void* init_thread_allocator(); + using Alloc = - Allocator; + Allocator; template class AllocPool : Pool< Allocator< + first_allocation, init_thread_allocator, MemoryProvider, SNMALLOC_DEFAULT_CHUNKMAP, - true, - lazy_replacement>, + true>, MemoryProvider> { using Alloc = Allocator< - MemoryProvider, + first_allocation, init_thread_allocator, MemoryProvider, SNMALLOC_DEFAULT_CHUNKMAP, - true, - lazy_replacement>; + true + >; using Parent = Pool; public: diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 79f8e4db8..cdea38b52 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -44,12 +44,18 @@ namespace snmalloc * alloc is performing initialization, so this is not required, and just * always returns nullptr to specify no new allocator is required. */ - SNMALLOC_FAST_PATH void* lazy_replacement(void* existing) + SNMALLOC_FAST_PATH bool first_allocation(void* existing) { UNUSED(existing); + return false; + } + + SNMALLOC_FAST_PATH void* init_thread_allocator() + { return nullptr; } + using ThreadAlloc = ThreadAllocUntypedWrapper; #else /** @@ -147,10 +153,11 @@ namespace snmalloc return get_reference(); # else auto alloc = get_reference(); - auto new_alloc = lazy_replacement(alloc); - return (likely(new_alloc == nullptr)) ? - alloc : - reinterpret_cast(new_alloc); + if (unlikely(first_allocation(alloc))) + { + alloc = reinterpret_cast(init_thread_allocator()); + } + return alloc; # endif } }; @@ -219,7 +226,7 @@ namespace snmalloc * the global placeholder is inlined, the rest of it is only hit in a very * unusual case and so should go off the fast path. */ - SNMALLOC_SLOW_PATH inline void* lazy_replacement_slow() + SNMALLOC_SLOW_PATH inline void* init_thread_allocator() { auto*& local_alloc = ThreadAlloc::get_reference(); SNMALLOC_ASSERT(local_alloc == &GlobalPlaceHolder); @@ -237,13 +244,9 @@ namespace snmalloc * so. If we have not allocated a per-thread allocator yet, then this * function will allocate one. */ - SNMALLOC_FAST_PATH void* lazy_replacement(void* existing) + SNMALLOC_FAST_PATH bool first_allocation(void* existing) { - if (existing != &GlobalPlaceHolder) - { - return nullptr; - } - return lazy_replacement_slow(); + return existing != &GlobalPlaceHolder; } #endif } // namespace snmalloc From bd8c443e9f25ead315d5c6783a6fb6e53c43a32a Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 10 Mar 2020 17:57:44 +0000 Subject: [PATCH 10/37] Fixup --- src/mem/alloc.h | 3 ++- src/mem/threadalloc.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 67765bb12..0ca389c15 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -996,8 +996,9 @@ namespace snmalloc template SNMALLOC_SLOW_PATH void* small_alloc_slow(sizeclass_t sizeclass) { - if (void* replacement = Replacement(this)) + if (IsFirstAllocation(this)) { + void* replacement = InitThreadAllocator(); return reinterpret_cast(replacement) ->template small_alloc_inner(sizeclass); } diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index cdea38b52..9a80709a9 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -246,7 +246,7 @@ namespace snmalloc */ SNMALLOC_FAST_PATH bool first_allocation(void* existing) { - return existing != &GlobalPlaceHolder; + return existing == &GlobalPlaceHolder; } #endif } // namespace snmalloc From 267a72608fa52e88f70443f69166cffa8624e0ac Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 10 Mar 2020 18:01:14 +0000 Subject: [PATCH 11/37] Minor tidy to statically sized dealloc. --- src/mem/alloc.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 0ca389c15..653081882 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -199,17 +199,14 @@ namespace snmalloc UNUSED(size); return free(p); #else - constexpr sizeclass_t sizeclass = size_to_sizeclass_const(size); - handle_message_queue(); - if (sizeclass < NUM_SMALL_CLASSES) { Superslab* super = Superslab::get(p); RemoteAllocator* target = super->get_allocator(); - if (target == public_state()) + if (likely(target == public_state())) small_dealloc(super, p, sizeclass); else remote_dealloc(target, p, sizeclass); @@ -219,7 +216,7 @@ namespace snmalloc Mediumslab* slab = Mediumslab::get(p); RemoteAllocator* target = slab->get_allocator(); - if (target == public_state()) + if (likely(target == public_state())) medium_dealloc(slab, p, sizeclass); else remote_dealloc(target, p, sizeclass); From 52c0ff048036ae506415d755812ed38bee6d3861 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 10 Mar 2020 18:11:13 +0000 Subject: [PATCH 12/37] Refactor semi-slow path for alloc Make the backup path a bit faster. Only algorithmic change is to delay checking for first allocation. Otherwise, should be unchanged. --- src/mem/alloc.h | 61 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 653081882..d16258f62 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -850,6 +850,11 @@ namespace snmalloc remote.post(id()); } + SNMALLOC_FAST_PATH bool has_messages() + { + return !(message_queue().is_empty()); + } + SNMALLOC_FAST_PATH void handle_message_queue() { // Inline the empty check, but not necessarily the full queue handling. @@ -915,7 +920,7 @@ namespace snmalloc } template - Slab* alloc_slab(sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH Slab* alloc_slab(sizeclass_t sizeclass) { stats().sizeclass_alloc_slab(sizeclass); if (Superslab::is_short_sizeclass(sizeclass)) @@ -987,41 +992,65 @@ namespace snmalloc return p; } - return small_alloc_slow(sizeclass); + if (likely(!has_messages())) + return small_alloc_new_free_list(sizeclass); + + return small_alloc_mq_slow(sizeclass); } template - SNMALLOC_SLOW_PATH void* small_alloc_slow(sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass) { - if (IsFirstAllocation(this)) - { - void* replacement = InitThreadAllocator(); - return reinterpret_cast(replacement) - ->template small_alloc_inner(sizeclass); - } + handle_message_queue(); - stats().sizeclass_alloc(sizeclass); + return small_alloc_new_free_list(sizeclass); + } - handle_message_queue(); + template + SNMALLOC_FAST_PATH void* small_alloc_new_free_list(sizeclass_t sizeclass) + { size_t rsize = sizeclass_to_size(sizeclass); auto& sl = small_classes[sizeclass]; Slab* slab; - if (!sl.is_empty()) + if (likely(!sl.is_empty())) { + stats().sizeclass_alloc(sizeclass); + SlabLink* link = sl.get_head(); slab = link->get_slab(); + auto& ffl = small_fast_free_lists[sizeclass]; + return slab->alloc( + sl, ffl, rsize, large_allocator.memory_provider); } - else + + if (likely(!IsFirstAllocation(this))) { - slab = alloc_slab(sizeclass); + stats().sizeclass_alloc(sizeclass); + return small_alloc_new_slab(sizeclass); + } + return small_alloc_first_alloc(sizeclass); + } + template + SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(sizeclass_t sizeclass) + { + auto replacement = InitThreadAllocator(); + return reinterpret_cast(replacement) + ->template small_alloc_inner(sizeclass); + } + + template + SNMALLOC_SLOW_PATH + void* small_alloc_new_slab(sizeclass_t sizeclass) + { + size_t rsize = sizeclass_to_size(sizeclass); + auto& sl = small_classes[sizeclass]; + Slab* slab = alloc_slab(sizeclass); if ((allow_reserve == NoReserve) && (slab == nullptr)) return nullptr; - sl.insert_back(slab->get_link()); - } auto& ffl = small_fast_free_lists[sizeclass]; return slab->alloc( sl, ffl, rsize, large_allocator.memory_provider); From e563bfbd6c3fb3218959f389a2068775bacb2872 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 12 Mar 2020 10:34:30 +0000 Subject: [PATCH 13/37] Test initial operation of a thread The fisrt operation a new thread takes is special. It results in allocating an allocator, and swinging it into the TLS. This makes this a very special path, that is rarely tested. This test generates a lot of threads to cover the first alloc and dealloc operations. --- .../func/first_operation/first_operation.cc | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 src/test/func/first_operation/first_operation.cc diff --git a/src/test/func/first_operation/first_operation.cc b/src/test/func/first_operation/first_operation.cc new file mode 100644 index 000000000..108da39eb --- /dev/null +++ b/src/test/func/first_operation/first_operation.cc @@ -0,0 +1,113 @@ +/** + * The first operation a thread performs takes a different path to every subsequent operation + * as it must lazily initialise the thread local allocator. This tests performs all sizes of + * allocation, and deallocation as the first operation. + ***/ + +#include "test/setup.h" +#include +#include + +void alloc1(size_t size) +{ + void* r = snmalloc::ThreadAlloc::get_noncachable()->alloc(size); + snmalloc::ThreadAlloc::get_noncachable()->dealloc(r); +} + +void alloc2(size_t size) +{ + auto a = snmalloc::ThreadAlloc::get_noncachable(); + void* r = a->alloc(size); + a->dealloc(r); +} + +void alloc3(size_t size) +{ + auto a = snmalloc::ThreadAlloc::get_noncachable(); + void* r = a->alloc(size); + a->dealloc(r,size); +} + +void alloc4(size_t size) +{ + auto a = snmalloc::ThreadAlloc::get(); + void* r = a->alloc(size); + a->dealloc(r); +} + +void dealloc1(void* p, size_t) +{ + snmalloc::ThreadAlloc::get_noncachable()->dealloc(p); +} + +void dealloc2(void* p, size_t size) +{ + snmalloc::ThreadAlloc::get_noncachable()->dealloc(p,size); +} + +void dealloc3(void* p, size_t) +{ + snmalloc::ThreadAlloc::get()->dealloc(p); +} + +void dealloc4(void* p, size_t size) +{ + snmalloc::ThreadAlloc::get()->dealloc(p, size); +} + +void f(size_t size) +{ + auto t1 = std::thread(alloc1, size); + auto t2 = std::thread(alloc2, size); + auto t3 = std::thread(alloc3, size); + auto t4 = std::thread(alloc4, size); + + auto a = snmalloc::ThreadAlloc::get(); + auto p1 = a->alloc(size); + auto p2 = a->alloc(size); + auto p3 = a->alloc(size); + auto p4 = a->alloc(size); + + auto t5 = std::thread(dealloc1, p1, size); + auto t6 = std::thread(dealloc2, p2, size); + auto t7 = std::thread(dealloc3, p3, size); + auto t8 = std::thread(dealloc4, p4, size); + + t1.join(); + t2.join(); + t3.join(); + t4.join(); + t5.join(); + t6.join(); + t7.join(); + t8.join(); +} + +int main(int, char**) +{ + setup(); + + f(0); + f(1); + f(3); + f(5); + f(7); + for (size_t exp = 1; exp < snmalloc::SUPERSLAB_BITS; exp++) + { + f(1ULL << exp); + f(3ULL << exp); + f(5ULL << exp); + f(7ULL << exp); + f((1ULL << exp) + 1); + f((3ULL << exp) + 1); + f((5ULL << exp) + 1); + f((7ULL << exp) + 1); + f((1ULL << exp) - 1); + f((3ULL << exp) - 1); + f((5ULL << exp) - 1); + f((7ULL << exp) - 1); + } +} + + + From a1d139c1044baddb6940b3f41c2a065256c15b05 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 12 Mar 2020 10:37:01 +0000 Subject: [PATCH 14/37] Correctly handle reusing get_noncachable --- src/mem/threadalloc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 9a80709a9..4b0a9b052 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -229,7 +229,13 @@ namespace snmalloc SNMALLOC_SLOW_PATH inline void* init_thread_allocator() { auto*& local_alloc = ThreadAlloc::get_reference(); - SNMALLOC_ASSERT(local_alloc == &GlobalPlaceHolder); + if (local_alloc != &GlobalPlaceHolder) + { + // If someone reuses a noncachable call, then we can end up here. + // The allocator has already been initialised. Could either error + // to say stop doing this, or just give them the initialised version. + return local_alloc; + } local_alloc = current_alloc_pool()->acquire(); SNMALLOC_ASSERT(local_alloc != &GlobalPlaceHolder); ThreadAlloc::register_cleanup(); From f9e0f64144a591e6c99cf467bb7acc5ac2b615ad Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 12 Mar 2020 10:38:34 +0000 Subject: [PATCH 15/37] Fix large alloc stats Large alloc stats aren't necessarily balanced on a thread, this changes to tracking individual pushs and pops, rather than the net effect (with an unsigned value). --- src/mem/allocstats.h | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/mem/allocstats.h b/src/mem/allocstats.h index 444fc1e7a..85bea986b 100644 --- a/src/mem/allocstats.h +++ b/src/mem/allocstats.h @@ -125,7 +125,9 @@ namespace snmalloc bits::one_at_bit(bits::ADDRESS_BITS - 1)); Stats sizeclass[N]; - Stats large[LARGE_N]; + + size_t large_pop_count[LARGE_N] = {0}; + size_t large_push_count[LARGE_N] = {0}; size_t remote_freed = 0; size_t remote_posted = 0; @@ -159,7 +161,7 @@ namespace snmalloc for (size_t i = 0; i < LARGE_N; i++) { - if (!large[i].is_empty()) + if (large_push_count[i] != large_pop_count[i]) return false; } @@ -194,7 +196,7 @@ namespace snmalloc UNUSED(sc); #ifdef USE_SNMALLOC_STATS - large[sc].count.inc(); + large_pop_count[sc]++; #endif } @@ -223,7 +225,7 @@ namespace snmalloc UNUSED(sc); #ifdef USE_SNMALLOC_STATS - large[sc].count.dec(); + large_push_count[sc]++; #endif } @@ -289,7 +291,10 @@ namespace snmalloc sizeclass[i].add(that.sizeclass[i]); for (size_t i = 0; i < LARGE_N; i++) - large[i].add(that.large[i]); + { + large_push_count[i] += that.large_push_count[i]; + large_pop_count[i] += that.large_pop_count[i]; + } for (size_t i = 0; i < TOTAL_BUCKETS; i++) bucketed_requests[i] += that.bucketed_requests[i]; @@ -343,6 +348,15 @@ namespace snmalloc << "Average Slab Usage" << "Average wasted space" << csv.endl; + csv << "LargeBucketedStats" + << "DumpID" + << "AllocatorID" + << "Size group" + << "Size" + << "Push count" + << "Pop count" + << csv.endl; + csv << "AllocSizes" << "DumpID" << "AllocatorID" @@ -367,13 +381,12 @@ namespace snmalloc for (uint8_t i = 0; i < LARGE_N; i++) { - if (large[i].count.is_unused()) + if ((large_push_count[i] == 0) && (large_pop_count[i] == 0)) continue; - csv << "BucketedStats" << dumpid << allocatorid << (i + N) - << large_sizeclass_to_size(i); - - large[i].print(csv, large_sizeclass_to_size(i)); + csv << "LargeBucketedStats" << dumpid << allocatorid << (i + N) + << large_sizeclass_to_size(i) << large_push_count[i] + << large_pop_count[i] << csv.endl; } size_t low = 0; From 37d7e15543444276d9f2c6debe1b1b2e8676c9d8 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 12 Mar 2020 10:39:19 +0000 Subject: [PATCH 16/37] Fix TLS init on large alloc path --- src/mem/alloc.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index d16258f62..92fa0e47f 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1267,6 +1267,13 @@ namespace snmalloc { MEASURE_TIME(large_dealloc, 4, 16); + if (IsFirstAllocation(this)) + { + void* replacement = InitThreadAllocator(); + return reinterpret_cast(replacement) + ->large_dealloc(p, size); + } + size_t size_bits = bits::next_pow2_bits(size); SNMALLOC_ASSERT(bits::one_at_bit(size_bits) >= SUPERSLAB_SIZE); size_t large_class = size_bits - SUPERSLAB_BITS; From 075874e2e62d0b7906ab7e9d2acadc971d77376c Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 12 Mar 2020 10:50:00 +0000 Subject: [PATCH 17/37] Fixup slab refactor --- src/mem/slab.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mem/slab.h b/src/mem/slab.h index e1a2cb9fa..2b317fcd7 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -156,6 +156,10 @@ namespace snmalloc else memory_provider.template zero(p, rsize); } + else + { + UNUSED(rsize); + } return p; } From 68b49dfb32b0bf52ecbe4378c72f733c4701c26d Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 12 Mar 2020 10:50:28 +0000 Subject: [PATCH 18/37] Minor refactor. --- src/mem/alloc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 92fa0e47f..3be165996 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -858,7 +858,7 @@ namespace snmalloc SNMALLOC_FAST_PATH void handle_message_queue() { // Inline the empty check, but not necessarily the full queue handling. - if (likely(message_queue().is_empty())) + if (likely(!has_messages())) return; handle_message_queue_inner(); @@ -1004,7 +1004,7 @@ namespace snmalloc handle_message_queue(); return small_alloc_new_free_list(sizeclass); - } + } template SNMALLOC_FAST_PATH void* small_alloc_new_free_list(sizeclass_t sizeclass) @@ -1031,11 +1031,11 @@ namespace snmalloc return small_alloc_new_slab(sizeclass); } return small_alloc_first_alloc(sizeclass); - } + } template SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(sizeclass_t sizeclass) - { + { auto replacement = InitThreadAllocator(); return reinterpret_cast(replacement) ->template small_alloc_inner(sizeclass); From f8b77a8c97da3a6b71be92735a59068d338cfc54 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 12 Mar 2020 10:51:28 +0000 Subject: [PATCH 19/37] Minor refactor --- src/mem/alloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 3be165996..de616be9f 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1001,7 +1001,7 @@ namespace snmalloc template SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass) { - handle_message_queue(); + handle_message_queue_inner(); return small_alloc_new_free_list(sizeclass); } From d6562320af2d06c23d64667a9a4b74005c3a7cce Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 12 Mar 2020 19:03:39 +0000 Subject: [PATCH 20/37] Add Bump ptrs to allocator Each allocator has a bump ptr for each size class. This is no longer slab local. Slabs that haven't been fully allocated no longer need to be in the DLL for this sizeclass. --- src/ds/dllist.h | 8 ++--- src/mem/alloc.h | 84 ++++++++++++++++++++++++++++++++++++++------- src/mem/metaslab.h | 4 +-- src/mem/slab.h | 76 +++++++++++----------------------------- src/mem/superslab.h | 12 ++++--- 5 files changed, 106 insertions(+), 78 deletions(-) diff --git a/src/ds/dllist.h b/src/ds/dllist.h index e4e70d2ef..4a7e9c726 100644 --- a/src/ds/dllist.h +++ b/src/ds/dllist.h @@ -94,12 +94,12 @@ namespace snmalloc return *this; } - bool is_empty() + SNMALLOC_FAST_PATH bool is_empty() { return head == Terminator(); } - T* get_head() + SNMALLOC_FAST_PATH T* get_head() { return head; } @@ -109,7 +109,7 @@ namespace snmalloc return tail; } - T* pop() + SNMALLOC_FAST_PATH T* pop() { T* item = head; @@ -169,7 +169,7 @@ namespace snmalloc #endif } - void remove(T* item) + SNMALLOC_FAST_PATH void remove(T* item) { #ifndef NDEBUG debug_check_contains(item); diff --git a/src/mem/alloc.h b/src/mem/alloc.h index de616be9f..05c65de6f 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -87,6 +87,13 @@ namespace snmalloc LargeAlloc large_allocator; ChunkMap chunk_map; + /** + * Per size class bumpptr for building new free lists + * If aligned to a SLAB start, then it is empty, and a new + * slab is required. + ***/ + void* bump_ptrs[NUM_SMALL_CLASSES] = {0}; + public: Stats& stats() { @@ -722,6 +729,26 @@ namespace snmalloc } } + // Dump bump allocators back into memory + for (size_t i = 0; i < NUM_SMALL_CLASSES; i++) + { + auto& bp = bump_ptrs[i]; + auto rsize = sizeclass_to_size(i); + FreeListHead ffl; + while (pointer_align_up(bp, SLAB_SIZE) != bp) + { + Slab::alloc_new_list(bp, ffl, rsize); + void* prev = ffl.value; + while (prev != nullptr) + { + auto n = Metaslab::follow_next(prev); + Superslab* super = Superslab::get(prev); + small_dealloc_offseted_inner(super, prev, i); + prev = n; + } + } + } + for (size_t i = 0; i < NUM_SMALL_CLASSES; i++) { auto prev = small_fast_free_lists[i].value; @@ -993,7 +1020,7 @@ namespace snmalloc } if (likely(!has_messages())) - return small_alloc_new_free_list(sizeclass); + return small_alloc_next_free_list(sizeclass); return small_alloc_mq_slow(sizeclass); } @@ -1003,11 +1030,11 @@ namespace snmalloc { handle_message_queue_inner(); - return small_alloc_new_free_list(sizeclass); + return small_alloc_next_free_list(sizeclass); } template - SNMALLOC_FAST_PATH void* small_alloc_new_free_list(sizeclass_t sizeclass) + SNMALLOC_FAST_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass) { size_t rsize = sizeclass_to_size(sizeclass); auto& sl = small_classes[sizeclass]; @@ -1024,11 +1051,17 @@ namespace snmalloc return slab->alloc( sl, ffl, rsize, large_allocator.memory_provider); } - + return small_alloc_rare(sizeclass); + } + + + template + SNMALLOC_SLOW_PATH void* small_alloc_rare(sizeclass_t sizeclass) + { if (likely(!IsFirstAllocation(this))) { stats().sizeclass_alloc(sizeclass); - return small_alloc_new_slab(sizeclass); + return small_alloc_new_free_list(sizeclass); } return small_alloc_first_alloc(sizeclass); } @@ -1041,19 +1074,46 @@ namespace snmalloc ->template small_alloc_inner(sizeclass); } + template + SNMALLOC_FAST_PATH + void* small_alloc_new_free_list(sizeclass_t sizeclass) + { + auto& bp = bump_ptrs[sizeclass]; + if (unlikely(pointer_align_up(bp, SLAB_SIZE) == bp)) + { + // Fetch new slab + return small_alloc_new_slab(sizeclass); + } + auto rsize = sizeclass_to_size(sizeclass); + auto& ffl = small_fast_free_lists[sizeclass]; + assert(ffl.value == nullptr); + Slab::alloc_new_list(bp, ffl, rsize); + + void* p = remove_cache_friendly_offset(ffl.value, sizeclass); + ffl.value = Metaslab::follow_next(p); + + if constexpr (zero_mem == YesZero) + { + large_allocator.memory_provider.zero(p, sizeclass_to_size(sizeclass)); + } + return p; + } + template SNMALLOC_SLOW_PATH void* small_alloc_new_slab(sizeclass_t sizeclass) { - size_t rsize = sizeclass_to_size(sizeclass); - auto& sl = small_classes[sizeclass]; - Slab* slab = alloc_slab(sizeclass); + auto& bp = bump_ptrs[sizeclass]; + if (unlikely(pointer_align_up(bp, SLAB_SIZE) == bp)) + { + // Fetch new slab + Slab* slab = alloc_slab(sizeclass); if ((allow_reserve == NoReserve) && (slab == nullptr)) return nullptr; - sl.insert_back(slab->get_link()); - auto& ffl = small_fast_free_lists[sizeclass]; - return slab->alloc( - sl, ffl, rsize, large_allocator.memory_provider); + bp = pointer_offset(slab, get_initial_offset(sizeclass, slab->is_short())); + } + + return small_alloc_new_free_list(sizeclass); } SNMALLOC_FAST_PATH void diff --git a/src/mem/metaslab.h b/src/mem/metaslab.h index 837c265da..debe7510f 100644 --- a/src/mem/metaslab.h +++ b/src/mem/metaslab.h @@ -13,7 +13,7 @@ namespace snmalloc SlabLink* prev; SlabLink* next; - Slab* get_slab() + SNMALLOC_FAST_PATH Slab* get_slab() { return pointer_align_down(this); } @@ -86,7 +86,7 @@ namespace snmalloc return result; } - void set_full() + SNMALLOC_FAST_PATH void set_full() { SNMALLOC_ASSERT(head == nullptr); SNMALLOC_ASSERT(link != 1); diff --git a/src/mem/slab.h b/src/mem/slab.h index 2b317fcd7..4f3762d49 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -40,7 +40,7 @@ namespace snmalloc { // Read the head from the metadata stored in the superslab. Metaslab& meta = get_meta(); - void* head = meta.head; + SNMALLOC_ASSERT(meta.link != 1); SNMALLOC_ASSERT(rsize == sizeclass_to_size(meta.sizeclass)); SNMALLOC_ASSERT( @@ -48,69 +48,37 @@ namespace snmalloc SNMALLOC_ASSERT(!meta.is_full()); meta.debug_slab_invariant(this); - if (unlikely(head == nullptr)) - { - return alloc_refill(meta, sl, fast_free_list, rsize, memory_provider); - } - - return alloc_pull_from_list(meta, fast_free_list, rsize, memory_provider); - } - - template - SNMALLOC_FAST_PATH void* alloc_pull_from_list( - Metaslab& meta, - FreeListHead& fast_free_list, - size_t rsize, - MemoryProvider& memory_provider) - { - void* p = meta.head; - - // Read the next slot from the memory that's about to be allocated. - void* next = Metaslab::follow_next(p); // Put everything in allocators small_class free list. + fast_free_list.value = meta.head; meta.head = nullptr; - fast_free_list.value = next; - // Treat stealing the free list as allocating it all. - // Link is not in use, i.e. - 1 is required. - meta.needed = meta.allocated - 1; - p = remove_cache_friendly_offset(p, meta.sizeclass); + // Return the link as the node for this allocation. + void* link = pointer_offset(this, meta.link); + void* p = remove_cache_friendly_offset(link, meta.sizeclass); + + // Treat stealing the free list as allocating it all. + meta.needed = meta.allocated; + meta.set_full(); + sl.pop(); return alloc_finish(meta, p, rsize, memory_provider); } - template - SNMALLOC_SLOW_PATH void* alloc_refill( - Metaslab& meta, - SlabList& sl, + static + SNMALLOC_SLOW_PATH void alloc_new_list( + void*& bumpptr, FreeListHead& fast_free_list, - size_t rsize, - MemoryProvider& memory_provider) + size_t rsize) { - size_t bumpptr = get_initial_offset(meta.sizeclass, is_short()); - bumpptr += meta.allocated * rsize; - if (bumpptr == SLAB_SIZE) - { - // Everything is in use, so we need all entries to be - // return before we can reclaim this slab. - meta.needed = meta.allocated; - - void* link = pointer_offset(this, meta.link); - void* p = remove_cache_friendly_offset(link, meta.sizeclass); - - meta.set_full(); - sl.pop(); - return alloc_finish(meta, p, rsize, memory_provider); - } // Allocate the last object on the current page if there is one, // and then thread the next free list worth of allocations. bool crossed_page_boundary = false; void* curr = nullptr; while (true) { - size_t newbumpptr = bumpptr + rsize; - auto alignedbumpptr = bits::align_up(bumpptr - 1, OS_PAGE_SIZE); - auto alignednewbumpptr = bits::align_up(newbumpptr, OS_PAGE_SIZE); + void* newbumpptr = pointer_offset(bumpptr, rsize); + auto alignedbumpptr = bits::align_up(address_cast(bumpptr) - 1, OS_PAGE_SIZE); + auto alignednewbumpptr = bits::align_up(address_cast(newbumpptr), OS_PAGE_SIZE); if (alignedbumpptr != alignednewbumpptr) { @@ -124,22 +92,18 @@ namespace snmalloc if (curr == nullptr) { - meta.head = pointer_offset(this, bumpptr); + fast_free_list.value = bumpptr; } else { - Metaslab::store_next( - curr, (bumpptr == 1) ? nullptr : pointer_offset(this, bumpptr)); + Metaslab::store_next(curr, bumpptr); } - curr = pointer_offset(this, bumpptr); + curr = bumpptr; bumpptr = newbumpptr; - meta.allocated = meta.allocated + 1; } SNMALLOC_ASSERT(curr != nullptr); Metaslab::store_next(curr, nullptr); - - return alloc_pull_from_list(meta, fast_free_list, rsize, memory_provider); } template diff --git a/src/mem/superslab.h b/src/mem/superslab.h index e236534db..64af1fcbf 100644 --- a/src/mem/superslab.h +++ b/src/mem/superslab.h @@ -160,10 +160,12 @@ namespace snmalloc if ((used & 1) == 1) return alloc_slab(sizeclass); - meta[0].allocated = 1; meta[0].head = nullptr; + // Set up meta data as if the entire slab has been turned into a free list. + meta[0].allocated = (uint16_t)((SLAB_SIZE - get_initial_offset(sizeclass, true)) / sizeclass_to_size(sizeclass)); + meta[0].link = 1; + meta[0].needed = 1; meta[0].sizeclass = static_cast(sizeclass); - meta[0].link = get_initial_offset(sizeclass, true); used++; return reinterpret_cast(this); @@ -178,9 +180,11 @@ namespace snmalloc uint8_t n = meta[h].next; meta[h].head = nullptr; - meta[h].allocated = 1; + // Set up meta data as if the entire slab has been turned into a free list. + meta[h].allocated = (uint16_t)((SLAB_SIZE - get_initial_offset(sizeclass, false)) / sizeclass_to_size(sizeclass)); + meta[h].needed = 1; + meta[h].link = 1; meta[h].sizeclass = static_cast(sizeclass); - meta[h].link = get_initial_offset(sizeclass, false); head = h + n + 1; used += 2; From 54dcb20ee3f8eac11b69750e72398a6c9e5edb4f Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 13 Mar 2020 14:22:57 +0000 Subject: [PATCH 21/37] Bug fix --- src/mem/alloc.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 05c65de6f..18cf66ecc 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1079,11 +1079,19 @@ namespace snmalloc void* small_alloc_new_free_list(sizeclass_t sizeclass) { auto& bp = bump_ptrs[sizeclass]; - if (unlikely(pointer_align_up(bp, SLAB_SIZE) == bp)) + if (likely(pointer_align_up(bp, SLAB_SIZE) != bp)) { + return small_alloc_build_free_list(sizeclass); + } // Fetch new slab return small_alloc_new_slab(sizeclass); } + + template + SNMALLOC_FAST_PATH + void* small_alloc_build_free_list(sizeclass_t sizeclass) + { + auto& bp = bump_ptrs[sizeclass]; auto rsize = sizeclass_to_size(sizeclass); auto& ffl = small_fast_free_lists[sizeclass]; assert(ffl.value == nullptr); @@ -1104,16 +1112,13 @@ namespace snmalloc void* small_alloc_new_slab(sizeclass_t sizeclass) { auto& bp = bump_ptrs[sizeclass]; - if (unlikely(pointer_align_up(bp, SLAB_SIZE) == bp)) - { // Fetch new slab Slab* slab = alloc_slab(sizeclass); if ((allow_reserve == NoReserve) && (slab == nullptr)) return nullptr; bp = pointer_offset(slab, get_initial_offset(sizeclass, slab->is_short())); - } - return small_alloc_new_free_list(sizeclass); + return small_alloc_build_free_list(sizeclass); } SNMALLOC_FAST_PATH void From bd194847f889188d11ec50c0fb062d61d347e578 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 13 Mar 2020 14:24:45 +0000 Subject: [PATCH 22/37] Change to a cycle non-empty list This change reduces the branching in the case of finding a new free list. Using a non-empty cyclic list enables branch free add, and a single branch in remove to detect the empty case. --- src/ds/cdllist.h | 91 ++++++++++++++++++++++++++++++++++++++++++++++ src/mem/alloc.h | 24 ++++++------ src/mem/metaslab.h | 17 ++++----- src/mem/slab.h | 10 ++--- 4 files changed, 115 insertions(+), 27 deletions(-) create mode 100644 src/ds/cdllist.h diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h new file mode 100644 index 000000000..3572dd00a --- /dev/null +++ b/src/ds/cdllist.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include +#include "defines.h" + +namespace snmalloc +{ + /** + * Special class for cyclic doubly linked non-empty linked list + * + * This code assumes there is always one element in the list. The client + * must ensure there is a sentinal element. + **/ + class CDLLNode + { + CDLLNode* next; + CDLLNode* prev; + + public: + CDLLNode() + { + next = this; + prev = this; + } + + SNMALLOC_FAST_PATH bool is_empty() + { + return next == this; + } + + SNMALLOC_FAST_PATH void remove() + { + SNMALLOC_ASSERT(!is_empty()); + debug_check(); + next->prev = prev; + prev->next = next; + next->debug_check(); + +#ifndef NDEBUG + next = nullptr; + prev = nullptr; +#endif + } + + SNMALLOC_FAST_PATH CDLLNode* get_next() + { + return next; + } + + SNMALLOC_FAST_PATH CDLLNode* get_prev() + { + return prev; + } + + SNMALLOC_FAST_PATH void insert_next(CDLLNode* item) + { + debug_check(); + item->next = next; + next->prev = item; + item->prev = this; + next = item; + debug_check(); + } + + SNMALLOC_FAST_PATH void insert_prev(CDLLNode* item) + { + debug_check(); + item->prev = prev; + prev->next = item; + item->next = this; + prev = item; + debug_check(); + } + + void debug_check() + { +#ifndef NDEBUG + CDLLNode* item = this->next; + CDLLNode* p = this; + + do + { + SNMALLOC_ASSERT(item->prev == p); + p = item; + item = item->next; + } while (item != this); +#endif + } + }; +} // namespace snmalloc diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 18cf66ecc..a32d05b41 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1034,7 +1034,7 @@ namespace snmalloc } template - SNMALLOC_FAST_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass) { size_t rsize = sizeclass_to_size(sizeclass); auto& sl = small_classes[sizeclass]; @@ -1045,8 +1045,8 @@ namespace snmalloc { stats().sizeclass_alloc(sizeclass); - SlabLink* link = sl.get_head(); - slab = link->get_slab(); + SlabLink* link = sl.get_next(); + slab = get_slab(link); auto& ffl = small_fast_free_lists[sizeclass]; return slab->alloc( sl, ffl, rsize, large_allocator.memory_provider); @@ -1083,9 +1083,9 @@ namespace snmalloc { return small_alloc_build_free_list(sizeclass); } - // Fetch new slab - return small_alloc_new_slab(sizeclass); - } + // Fetch new slab + return small_alloc_new_slab(sizeclass); + } template SNMALLOC_FAST_PATH @@ -1112,12 +1112,12 @@ namespace snmalloc void* small_alloc_new_slab(sizeclass_t sizeclass) { auto& bp = bump_ptrs[sizeclass]; - // Fetch new slab - Slab* slab = alloc_slab(sizeclass); - if ((allow_reserve == NoReserve) && (slab == nullptr)) - return nullptr; - bp = pointer_offset(slab, get_initial_offset(sizeclass, slab->is_short())); - + // Fetch new slab + Slab* slab = alloc_slab(sizeclass); + if ((allow_reserve == NoReserve) && (slab == nullptr)) + return nullptr; + bp = pointer_offset(slab, get_initial_offset(sizeclass, slab->is_short())); + return small_alloc_build_free_list(sizeclass); } diff --git a/src/mem/metaslab.h b/src/mem/metaslab.h index debe7510f..af07ba625 100644 --- a/src/mem/metaslab.h +++ b/src/mem/metaslab.h @@ -1,6 +1,7 @@ #pragma once #include "../ds/dllist.h" +#include "../ds/cdllist.h" #include "../ds/helpers.h" #include "sizeclass.h" @@ -8,18 +9,14 @@ namespace snmalloc { class Slab; - struct SlabLink - { - SlabLink* prev; - SlabLink* next; + using SlabList = CDLLNode; + using SlabLink = CDLLNode; - SNMALLOC_FAST_PATH Slab* get_slab() - { - return pointer_align_down(this); - } - }; + SNMALLOC_FAST_PATH Slab* get_slab(SlabLink* sl) + { + return pointer_align_down(sl); + } - using SlabList = DLList; static_assert( sizeof(SlabLink) <= MIN_ALLOC_SIZE, diff --git a/src/mem/slab.h b/src/mem/slab.h index 4f3762d49..96f275b2d 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -44,7 +44,7 @@ namespace snmalloc SNMALLOC_ASSERT(rsize == sizeclass_to_size(meta.sizeclass)); SNMALLOC_ASSERT( - sl.get_head() == (SlabLink*)pointer_offset(this, meta.link)); + sl.get_next() == (SlabLink*)pointer_offset(this, meta.link)); SNMALLOC_ASSERT(!meta.is_full()); meta.debug_slab_invariant(this); @@ -59,13 +59,13 @@ namespace snmalloc // Treat stealing the free list as allocating it all. meta.needed = meta.allocated; meta.set_full(); - sl.pop(); + sl.get_next()->remove(); return alloc_finish(meta, p, rsize, memory_provider); } static - SNMALLOC_SLOW_PATH void alloc_new_list( + SNMALLOC_FAST_PATH void alloc_new_list( void*& bumpptr, FreeListHead& fast_free_list, size_t rsize) @@ -192,13 +192,13 @@ namespace snmalloc meta.needed = meta.allocated - 1; // Push on the list of slabs for this sizeclass. - sl->insert_back(meta.get_link(this)); + sl->insert_prev(meta.get_link(this)); meta.debug_slab_invariant(this); return Superslab::NoSlabReturn; } // Remove from the sizeclass list and dealloc on the superslab. - sl->remove(meta.get_link(this)); + meta.get_link(this)->remove(); if (is_short()) return super->dealloc_short_slab(); From ed69bbbcbd0747514f6376755a25510b7b1d931b Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 10:32:46 +0000 Subject: [PATCH 23/37] Comments. --- src/ds/cdllist.h | 13 ++++++++++ src/mem/alloc.h | 58 +++++++++++++++++++++++++++++++++++-------- src/mem/slab.h | 51 ++++++++++++++++++++----------------- src/mem/superslab.h | 12 +++++++-- src/mem/threadalloc.h | 31 +++++++++++++---------- 5 files changed, 116 insertions(+), 49 deletions(-) diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h index 3572dd00a..3fec8e959 100644 --- a/src/ds/cdllist.h +++ b/src/ds/cdllist.h @@ -18,6 +18,9 @@ namespace snmalloc CDLLNode* prev; public: + /** + * Single element cyclic list. This is the empty case. + **/ CDLLNode() { next = this; @@ -29,12 +32,17 @@ namespace snmalloc return next == this; } + /** + * Removes this element from the cyclic list is it part of. + **/ SNMALLOC_FAST_PATH void remove() { SNMALLOC_ASSERT(!is_empty()); debug_check(); next->prev = prev; prev->next = next; + // As this is no longer in the list, check invariant for + // neighbouring element. next->debug_check(); #ifndef NDEBUG @@ -73,6 +81,11 @@ namespace snmalloc debug_check(); } + /** + * Checks the lists invariants + * x->next->prev = x + * for all x in the list. + **/ void debug_check() { #ifndef NDEBUG diff --git a/src/mem/alloc.h b/src/mem/alloc.h index a32d05b41..2b5847042 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -50,8 +50,19 @@ namespace snmalloc }; /** - * Allocator. This class is parameterised on three template parameters. The - * `MemoryProvider` defines the source of memory for this allocator. + * Allocator. This class is parameterised on five template parameters. + * + * The first two template parameter provides a hook to allow the allocator in + * use to be dynamically modified. This is used to implement a trick from + * mimalloc that avoids a conditional branch on the fast path. We + * initialise the thread-local allocator pointer with the address of a global + * allocator, which never owns any memory. The first returns true, if is + * passed the global allocator. The second initialises the thread-local + * allocator if it is has been been initialised already. Splitting into two + * functions allows for the code to be structured into tail calls to improve + * codegen. + * + * The `MemoryProvider` defines the source of memory for this allocator. * Allocators try to reuse address space by allocating from existing slabs or * reusing freed large allocations. When they need to allocate a new chunk * of memory they request space from the `MemoryProvider`. @@ -60,17 +71,10 @@ namespace snmalloc * to associate metadata with large (16MiB, by default) regions, allowing an * allocator to find the allocator responsible for that region. * - * The next template parameter, `IsQueueInline`, defines whether the + * The final template parameter, `IsQueueInline`, defines whether the * message queue for this allocator should be stored as a field of the * allocator (`true`) or provided externally, allowing it to be anywhere else * in the address space (`false`). - * - * The final template parameter provides a hook to allow the allocator in use - * to be dynamically modified. This is used to implement a trick from - * mimalloc that avoids a conditional branch on the fast path. We initialise - * the thread-local allocator pointer with the address of a global allocator, - * which never owns any memory. When we try to allocate memory, we call the - * replacement function. */ template< bool (*IsFirstAllocation)(void*), @@ -877,6 +881,10 @@ namespace snmalloc remote.post(id()); } + /** + * Check if this allocator has messages to deallocate blocks from another + * thread + **/ SNMALLOC_FAST_PATH bool has_messages() { return !(message_queue().is_empty()); @@ -1025,6 +1033,10 @@ namespace snmalloc return small_alloc_mq_slow(sizeclass); } + /** + * Slow path for handling message queue, before dealing with small + * allocation request. + **/ template SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass) { @@ -1033,6 +1045,9 @@ namespace snmalloc return small_alloc_next_free_list(sizeclass); } + /** + * Attempt to find a new free list to allocate from + **/ template SNMALLOC_SLOW_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass) { @@ -1054,7 +1069,11 @@ namespace snmalloc return small_alloc_rare(sizeclass); } - + /** + * Called when, there are no available free list to service this request + * Could be due to using the dummy allocator, or needing to bump allocate a + * new free list. + **/ template SNMALLOC_SLOW_PATH void* small_alloc_rare(sizeclass_t sizeclass) { @@ -1066,6 +1085,10 @@ namespace snmalloc return small_alloc_first_alloc(sizeclass); } + /** + * Called on first allocation to set up the thread local allocator, + * then directs the allocation request to the newly created allocator. + **/ template SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(sizeclass_t sizeclass) { @@ -1074,6 +1097,10 @@ namespace snmalloc ->template small_alloc_inner(sizeclass); } + /** + * Called to create a new free list, and service the request from that new + * list. + **/ template SNMALLOC_FAST_PATH void* small_alloc_new_free_list(sizeclass_t sizeclass) @@ -1087,6 +1114,10 @@ namespace snmalloc return small_alloc_new_slab(sizeclass); } + /** + * Creates a new free list from the thread local bump allocator and service + * the request from that new list. + **/ template SNMALLOC_FAST_PATH void* small_alloc_build_free_list(sizeclass_t sizeclass) @@ -1107,6 +1138,11 @@ namespace snmalloc return p; } + /** + * Allocates a new slab to allocate from, set it to be the bump allocator + * for this size class, and then builds a new free list from the thread + * local bump allocator and service the request from that new list. + **/ template SNMALLOC_SLOW_PATH void* small_alloc_new_slab(sizeclass_t sizeclass) diff --git a/src/mem/slab.h b/src/mem/slab.h index 96f275b2d..2a9a59780 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -31,6 +31,11 @@ namespace snmalloc return get_meta().get_link(this); } + /** + * Takes a free list out of a slabs meta data. + * Returns the link as the allocation, and places the free list into the + * `fast_free_list` for further allocations. + **/ template SNMALLOC_FAST_PATH void* alloc( SlabList& sl, @@ -61,9 +66,31 @@ namespace snmalloc meta.set_full(); sl.get_next()->remove(); - return alloc_finish(meta, p, rsize, memory_provider); + SNMALLOC_ASSERT(is_start_of_object(Superslab::get(p), p)); + + meta.debug_slab_invariant(this); + + if constexpr (zero_mem == YesZero) + { + if (rsize < PAGE_ALIGNED_SIZE) + memory_provider.zero(p, rsize); + else + memory_provider.template zero(p, rsize); + } + else + { + UNUSED(rsize); + } + + return p; } + /** + * Given a bumpptr and a fast_free_list head reference, builds a new free + * list, and stores it in the fast_free_list. It will only create a page + * worth of allocations, or one if the allocation size is larger than a + * page. + **/ static SNMALLOC_FAST_PATH void alloc_new_list( void*& bumpptr, @@ -106,28 +133,6 @@ namespace snmalloc Metaslab::store_next(curr, nullptr); } - template - SNMALLOC_FAST_PATH void* alloc_finish(Metaslab& meta, void* p, size_t rsize, MemoryProvider& memory_provider) - { - SNMALLOC_ASSERT(is_start_of_object(Superslab::get(p), p)); - - meta.debug_slab_invariant(this); - - if constexpr (zero_mem == YesZero) - { - if (rsize < PAGE_ALIGNED_SIZE) - memory_provider.zero(p, rsize); - else - memory_provider.template zero(p, rsize); - } - else - { - UNUSED(rsize); - } - - return p; - } - bool is_start_of_object(Superslab* super, void* p) { Metaslab& meta = super->get_meta(this); diff --git a/src/mem/superslab.h b/src/mem/superslab.h index 64af1fcbf..156e1c26f 100644 --- a/src/mem/superslab.h +++ b/src/mem/superslab.h @@ -161,7 +161,11 @@ namespace snmalloc return alloc_slab(sizeclass); meta[0].head = nullptr; - // Set up meta data as if the entire slab has been turned into a free list. + // Set up meta data as if the entire slab has been turned into a free + // list. This means we don't have to check for special cases where we have + // returned all the elements, but this is a slab that is still being bump + // allocated from. Hence, the bump allocator slab will never be returned + // for use in another size class. meta[0].allocated = (uint16_t)((SLAB_SIZE - get_initial_offset(sizeclass, true)) / sizeclass_to_size(sizeclass)); meta[0].link = 1; meta[0].needed = 1; @@ -180,7 +184,11 @@ namespace snmalloc uint8_t n = meta[h].next; meta[h].head = nullptr; - // Set up meta data as if the entire slab has been turned into a free list. + // Set up meta data as if the entire slab has been turned into a free + // list. This means we don't have to check for special cases where we have + // returned all the elements, but this is a slab that is still being bump + // allocated from. Hence, the bump allocator slab will never be returned + // for use in another size class. meta[h].allocated = (uint16_t)((SLAB_SIZE - get_initial_offset(sizeclass, false)) / sizeclass_to_size(sizeclass)); meta[h].needed = 1; meta[h].link = 1; diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 4b0a9b052..cb2bc418d 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -40,16 +40,21 @@ namespace snmalloc /** * Function passed as a template parameter to `Allocator` to allow lazy - * replacement. In this case we are assuming the underlying external thread - * alloc is performing initialization, so this is not required, and just - * always returns nullptr to specify no new allocator is required. - */ + * replacement. This function returns true, if the allocated passed in, + * is the placeholder allocator. As the TLS state is managed externally, + * this will always return false. + **/ SNMALLOC_FAST_PATH bool first_allocation(void* existing) { UNUSED(existing); return false; } + /** + * Function passed as a tempalte parameter to `Allocator` to allow lazy + * replacement. There is nothing to initialise in this case, so we expect + * this to never be called. + **/ SNMALLOC_FAST_PATH void* init_thread_allocator() { return nullptr; @@ -222,9 +227,11 @@ namespace snmalloc # endif /** - * Slow path for the placeholder replacement. The simple check that this is - * the global placeholder is inlined, the rest of it is only hit in a very - * unusual case and so should go off the fast path. + * Slow path for the placeholder replacement. + * Function passed as a tempalte parameter to `Allocator` to allow lazy + * replacement. This function initialises the thread local state if requried. + * The simple check that this is the global placeholder is inlined, the rest + * of it is only hit in a very unusual case and so should go off the fast path. */ SNMALLOC_SLOW_PATH inline void* init_thread_allocator() { @@ -244,12 +251,10 @@ namespace snmalloc /** * Function passed as a template parameter to `Allocator` to allow lazy - * replacement. This is called on all of the slow paths in `Allocator`. If - * the caller is the global placeholder allocator then this function will - * check if we've already allocated a per-thread allocator, returning it if - * so. If we have not allocated a per-thread allocator yet, then this - * function will allocate one. - */ + * replacement. This function returns true, if the allocated passed in, + * is the placeholder allocator. If it returns true, then + * `init_thread_allocator` should be called. + **/ SNMALLOC_FAST_PATH bool first_allocation(void* existing) { return existing == &GlobalPlaceHolder; From 06032f23834203122263718bc78ccf1ba21cef70 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 10:50:21 +0000 Subject: [PATCH 24/37] Update differences --- difference.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/difference.md b/difference.md index aa8bb1cdc..273a74ca6 100644 --- a/difference.md +++ b/difference.md @@ -33,7 +33,10 @@ This document outlines the changes that have diverged from 4. We now store a direct pointer to the next element in each slabs free list rather than a relative offset into the slab. This enables list calculation on the fast path. - + + 5. There is a single bump-ptr per size class that is part of the + allocator structure. The per size class slab list now only contains slabs + with free list, and not if it only has a bump ptr. [2-4] Are changes that are directly inspired by (mimalloc)[http://github.com/microsoft/mimalloc]. \ No newline at end of file From 941e28ae62755c54f15623743e930992f2555595 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 10:50:59 +0000 Subject: [PATCH 25/37] Rename first allocation Use needs initialisation as makes more sense for other scenarios. --- src/mem/alloc.h | 16 ++++++++-------- src/mem/globalalloc.h | 8 ++++---- src/mem/threadalloc.h | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 2b5847042..c228b0b59 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -77,7 +77,7 @@ namespace snmalloc * in the address space (`false`). */ template< - bool (*IsFirstAllocation)(void*), + bool (*NeedsInitialisation)(void*), void* (*InitThreadAllocator)(), class MemoryProvider = GlobalVirtual, class ChunkMap = SNMALLOC_DEFAULT_CHUNKMAP, @@ -86,7 +86,7 @@ namespace snmalloc class Allocator : public FastFreeLists, public Pooled< - Allocator> + Allocator> { LargeAlloc large_allocator; ChunkMap chunk_map; @@ -511,7 +511,7 @@ namespace snmalloc inline size_t get_slot(size_t id, size_t r) { constexpr size_t allocator_size = sizeof( - Allocator); + Allocator); constexpr size_t initial_shift = bits::next_pow2_bits_const(allocator_size); SNMALLOC_ASSERT((initial_shift + (r * REMOTE_SLOT_BITS)) < 64); @@ -1077,7 +1077,7 @@ namespace snmalloc template SNMALLOC_SLOW_PATH void* small_alloc_rare(sizeclass_t sizeclass) { - if (likely(!IsFirstAllocation(this))) + if (likely(!NeedsInitialisation(this))) { stats().sizeclass_alloc(sizeclass); return small_alloc_new_free_list(sizeclass); @@ -1271,7 +1271,7 @@ namespace snmalloc } else { - if (IsFirstAllocation(this)) + if (NeedsInitialisation(this)) { void* replacement = InitThreadAllocator(); return reinterpret_cast(replacement) @@ -1344,7 +1344,7 @@ namespace snmalloc zero_mem == YesZero ? "zeromem" : "nozeromem", allow_reserve == NoReserve ? "noreserve" : "reserve")); - if (IsFirstAllocation(this)) + if (NeedsInitialisation(this)) { void* replacement = InitThreadAllocator(); return reinterpret_cast(replacement) @@ -1368,7 +1368,7 @@ namespace snmalloc { MEASURE_TIME(large_dealloc, 4, 16); - if (IsFirstAllocation(this)) + if (NeedsInitialisation(this)) { void* replacement = InitThreadAllocator(); return reinterpret_cast(replacement) @@ -1421,7 +1421,7 @@ namespace snmalloc // Now that we've established that we're in the slow path (if we're a // real allocator, we will have to empty our cache now), check if we are // a real allocator and construct one if we aren't. - if (IsFirstAllocation(this)) + if (NeedsInitialisation(this)) { void* replacement = InitThreadAllocator(); // We have to do a dealloc, not a remote_dealloc here because this may diff --git a/src/mem/globalalloc.h b/src/mem/globalalloc.h index a344f3b99..fe13f2732 100644 --- a/src/mem/globalalloc.h +++ b/src/mem/globalalloc.h @@ -6,23 +6,23 @@ namespace snmalloc { - inline bool first_allocation(void*); + inline bool needs_initialisation(void*); void* init_thread_allocator(); using Alloc = - Allocator; + Allocator; template class AllocPool : Pool< Allocator< - first_allocation, init_thread_allocator, + needs_initialisation, init_thread_allocator, MemoryProvider, SNMALLOC_DEFAULT_CHUNKMAP, true>, MemoryProvider> { using Alloc = Allocator< - first_allocation, init_thread_allocator, MemoryProvider, + needs_initialisation, init_thread_allocator, MemoryProvider, SNMALLOC_DEFAULT_CHUNKMAP, true >; diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index cb2bc418d..68a67eb32 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -44,7 +44,7 @@ namespace snmalloc * is the placeholder allocator. As the TLS state is managed externally, * this will always return false. **/ - SNMALLOC_FAST_PATH bool first_allocation(void* existing) + SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) { UNUSED(existing); return false; @@ -158,7 +158,7 @@ namespace snmalloc return get_reference(); # else auto alloc = get_reference(); - if (unlikely(first_allocation(alloc))) + if (unlikely(needs_initialisation(alloc))) { alloc = reinterpret_cast(init_thread_allocator()); } @@ -255,7 +255,7 @@ namespace snmalloc * is the placeholder allocator. If it returns true, then * `init_thread_allocator` should be called. **/ - SNMALLOC_FAST_PATH bool first_allocation(void* existing) + SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) { return existing == &GlobalPlaceHolder; } From 8a8a2f6549ee6e518e6fc8bbfb6d981c8fd41ba0 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 10:53:40 +0000 Subject: [PATCH 26/37] Fixup for thread alloc. --- src/mem/threadalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 68a67eb32..416c6a3e9 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -80,7 +80,7 @@ namespace snmalloc */ class ThreadAllocCommon { - friend void* lazy_replacement_slow(); + friend void* init_thread_allocator(); protected: static inline void inner_release() From 2215815c90fd4ad9270f09d9f96aef63a501dab2 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 13:01:36 +0000 Subject: [PATCH 27/37] Clangformat + CR feedback --- src/ds/cdllist.h | 5 +- src/mem/alloc.h | 52 ++++++++++--------- src/mem/allocstats.h | 5 +- src/mem/globalalloc.h | 18 ++++--- src/mem/metaslab.h | 3 +- src/mem/slab.h | 19 ++++--- src/mem/superslab.h | 8 ++- src/mem/threadalloc.h | 10 ++-- .../func/first_operation/first_operation.cc | 19 ++++--- 9 files changed, 75 insertions(+), 64 deletions(-) diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h index 3fec8e959..fcf4e135e 100644 --- a/src/ds/cdllist.h +++ b/src/ds/cdllist.h @@ -1,8 +1,9 @@ #pragma once +#include "defines.h" + #include #include -#include "defines.h" namespace snmalloc { @@ -44,7 +45,7 @@ namespace snmalloc // As this is no longer in the list, check invariant for // neighbouring element. next->debug_check(); - + #ifndef NDEBUG next = nullptr; prev = nullptr; diff --git a/src/mem/alloc.h b/src/mem/alloc.h index c228b0b59..e868a6e73 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -50,8 +50,8 @@ namespace snmalloc }; /** - * Allocator. This class is parameterised on five template parameters. - * + * Allocator. This class is parameterised on five template parameters. + * * The first two template parameter provides a hook to allow the allocator in * use to be dynamically modified. This is used to implement a trick from * mimalloc that avoids a conditional branch on the fast path. We @@ -61,7 +61,7 @@ namespace snmalloc * allocator if it is has been been initialised already. Splitting into two * functions allows for the code to be structured into tail calls to improve * codegen. - * + * * The `MemoryProvider` defines the source of memory for this allocator. * Allocators try to reuse address space by allocating from existing slabs or * reusing freed large allocations. When they need to allocate a new chunk @@ -81,12 +81,14 @@ namespace snmalloc void* (*InitThreadAllocator)(), class MemoryProvider = GlobalVirtual, class ChunkMap = SNMALLOC_DEFAULT_CHUNKMAP, - bool IsQueueInline = true - > - class Allocator - : public FastFreeLists, - public Pooled< - Allocator> + bool IsQueueInline = true> + class Allocator : public FastFreeLists, + public Pooled> { LargeAlloc large_allocator; ChunkMap chunk_map; @@ -95,7 +97,7 @@ namespace snmalloc * Per size class bumpptr for building new free lists * If aligned to a SLAB start, then it is empty, and a new * slab is required. - ***/ + */ void* bump_ptrs[NUM_SMALL_CLASSES] = {0}; public: @@ -510,8 +512,12 @@ namespace snmalloc /// r is used for which round of sending this is. inline size_t get_slot(size_t id, size_t r) { - constexpr size_t allocator_size = sizeof( - Allocator); + constexpr size_t allocator_size = sizeof(Allocator< + NeedsInitialisation, + InitThreadAllocator, + MemoryProvider, + ChunkMap, + IsQueueInline>); constexpr size_t initial_shift = bits::next_pow2_bits_const(allocator_size); SNMALLOC_ASSERT((initial_shift + (r * REMOTE_SLOT_BITS)) < 64); @@ -1028,14 +1034,14 @@ namespace snmalloc } if (likely(!has_messages())) - return small_alloc_next_free_list(sizeclass); + return small_alloc_next_free_list(sizeclass); return small_alloc_mq_slow(sizeclass); } /** * Slow path for handling message queue, before dealing with small - * allocation request. + * allocation request. **/ template SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass) @@ -1070,7 +1076,7 @@ namespace snmalloc } /** - * Called when, there are no available free list to service this request + * Called when there are no available free list to service this request * Could be due to using the dummy allocator, or needing to bump allocate a * new free list. **/ @@ -1102,8 +1108,7 @@ namespace snmalloc * list. **/ template - SNMALLOC_FAST_PATH - void* small_alloc_new_free_list(sizeclass_t sizeclass) + SNMALLOC_FAST_PATH void* small_alloc_new_free_list(sizeclass_t sizeclass) { auto& bp = bump_ptrs[sizeclass]; if (likely(pointer_align_up(bp, SLAB_SIZE) != bp)) @@ -1119,8 +1124,7 @@ namespace snmalloc * the request from that new list. **/ template - SNMALLOC_FAST_PATH - void* small_alloc_build_free_list(sizeclass_t sizeclass) + SNMALLOC_FAST_PATH void* small_alloc_build_free_list(sizeclass_t sizeclass) { auto& bp = bump_ptrs[sizeclass]; auto rsize = sizeclass_to_size(sizeclass); @@ -1140,20 +1144,20 @@ namespace snmalloc /** * Allocates a new slab to allocate from, set it to be the bump allocator - * for this size class, and then builds a new free list from the thread + * for this size class, and then builds a new free list from the thread * local bump allocator and service the request from that new list. **/ template - SNMALLOC_SLOW_PATH - void* small_alloc_new_slab(sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH void* small_alloc_new_slab(sizeclass_t sizeclass) { auto& bp = bump_ptrs[sizeclass]; // Fetch new slab Slab* slab = alloc_slab(sizeclass); if ((allow_reserve == NoReserve) && (slab == nullptr)) return nullptr; - bp = pointer_offset(slab, get_initial_offset(sizeclass, slab->is_short())); - + bp = + pointer_offset(slab, get_initial_offset(sizeclass, slab->is_short())); + return small_alloc_build_free_list(sizeclass); } diff --git a/src/mem/allocstats.h b/src/mem/allocstats.h index 85bea986b..1f6a4acd4 100644 --- a/src/mem/allocstats.h +++ b/src/mem/allocstats.h @@ -125,7 +125,7 @@ namespace snmalloc bits::one_at_bit(bits::ADDRESS_BITS - 1)); Stats sizeclass[N]; - + size_t large_pop_count[LARGE_N] = {0}; size_t large_push_count[LARGE_N] = {0}; @@ -354,8 +354,7 @@ namespace snmalloc << "Size group" << "Size" << "Push count" - << "Pop count" - << csv.endl; + << "Pop count" << csv.endl; csv << "AllocSizes" << "DumpID" diff --git a/src/mem/globalalloc.h b/src/mem/globalalloc.h index fe13f2732..7c33ed7db 100644 --- a/src/mem/globalalloc.h +++ b/src/mem/globalalloc.h @@ -9,23 +9,29 @@ namespace snmalloc inline bool needs_initialisation(void*); void* init_thread_allocator(); - using Alloc = - Allocator; + using Alloc = Allocator< + needs_initialisation, + init_thread_allocator, + GlobalVirtual, + SNMALLOC_DEFAULT_CHUNKMAP, + true>; template class AllocPool : Pool< Allocator< - needs_initialisation, init_thread_allocator, + needs_initialisation, + init_thread_allocator, MemoryProvider, SNMALLOC_DEFAULT_CHUNKMAP, true>, MemoryProvider> { using Alloc = Allocator< - needs_initialisation, init_thread_allocator, MemoryProvider, + needs_initialisation, + init_thread_allocator, + MemoryProvider, SNMALLOC_DEFAULT_CHUNKMAP, - true - >; + true>; using Parent = Pool; public: diff --git a/src/mem/metaslab.h b/src/mem/metaslab.h index af07ba625..efd0d81d0 100644 --- a/src/mem/metaslab.h +++ b/src/mem/metaslab.h @@ -1,7 +1,7 @@ #pragma once -#include "../ds/dllist.h" #include "../ds/cdllist.h" +#include "../ds/dllist.h" #include "../ds/helpers.h" #include "sizeclass.h" @@ -17,7 +17,6 @@ namespace snmalloc return pointer_align_down(sl); } - static_assert( sizeof(SlabLink) <= MIN_ALLOC_SIZE, "Need to be able to pack a SlabLink into any free small alloc"); diff --git a/src/mem/slab.h b/src/mem/slab.h index 2a9a59780..e07aac60e 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -33,9 +33,9 @@ namespace snmalloc /** * Takes a free list out of a slabs meta data. - * Returns the link as the allocation, and places the free list into the + * Returns the link as the allocation, and places the free list into the * `fast_free_list` for further allocations. - **/ + **/ template SNMALLOC_FAST_PATH void* alloc( SlabList& sl, @@ -89,13 +89,10 @@ namespace snmalloc * Given a bumpptr and a fast_free_list head reference, builds a new free * list, and stores it in the fast_free_list. It will only create a page * worth of allocations, or one if the allocation size is larger than a - * page. + * page. **/ - static - SNMALLOC_FAST_PATH void alloc_new_list( - void*& bumpptr, - FreeListHead& fast_free_list, - size_t rsize) + static SNMALLOC_FAST_PATH void + alloc_new_list(void*& bumpptr, FreeListHead& fast_free_list, size_t rsize) { // Allocate the last object on the current page if there is one, // and then thread the next free list worth of allocations. @@ -104,8 +101,10 @@ namespace snmalloc while (true) { void* newbumpptr = pointer_offset(bumpptr, rsize); - auto alignedbumpptr = bits::align_up(address_cast(bumpptr) - 1, OS_PAGE_SIZE); - auto alignednewbumpptr = bits::align_up(address_cast(newbumpptr), OS_PAGE_SIZE); + auto alignedbumpptr = + bits::align_up(address_cast(bumpptr) - 1, OS_PAGE_SIZE); + auto alignednewbumpptr = + bits::align_up(address_cast(newbumpptr), OS_PAGE_SIZE); if (alignedbumpptr != alignednewbumpptr) { diff --git a/src/mem/superslab.h b/src/mem/superslab.h index 156e1c26f..70890cbc5 100644 --- a/src/mem/superslab.h +++ b/src/mem/superslab.h @@ -166,7 +166,9 @@ namespace snmalloc // returned all the elements, but this is a slab that is still being bump // allocated from. Hence, the bump allocator slab will never be returned // for use in another size class. - meta[0].allocated = (uint16_t)((SLAB_SIZE - get_initial_offset(sizeclass, true)) / sizeclass_to_size(sizeclass)); + meta[0].allocated = (uint16_t)( + (SLAB_SIZE - get_initial_offset(sizeclass, true)) / + sizeclass_to_size(sizeclass)); meta[0].link = 1; meta[0].needed = 1; meta[0].sizeclass = static_cast(sizeclass); @@ -189,7 +191,9 @@ namespace snmalloc // returned all the elements, but this is a slab that is still being bump // allocated from. Hence, the bump allocator slab will never be returned // for use in another size class. - meta[h].allocated = (uint16_t)((SLAB_SIZE - get_initial_offset(sizeclass, false)) / sizeclass_to_size(sizeclass)); + meta[h].allocated = (uint16_t)( + (SLAB_SIZE - get_initial_offset(sizeclass, false)) / + sizeclass_to_size(sizeclass)); meta[h].needed = 1; meta[h].link = 1; meta[h].sizeclass = static_cast(sizeclass); diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 416c6a3e9..4db62d7c9 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -40,8 +40,8 @@ namespace snmalloc /** * Function passed as a template parameter to `Allocator` to allow lazy - * replacement. This function returns true, if the allocated passed in, - * is the placeholder allocator. As the TLS state is managed externally, + * replacement. This function returns true, if the allocator passed in + * requires initialisation. As the TLS state is managed externally, * this will always return false. **/ SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) @@ -60,7 +60,6 @@ namespace snmalloc return nullptr; } - using ThreadAlloc = ThreadAllocUntypedWrapper; #else /** @@ -227,11 +226,12 @@ namespace snmalloc # endif /** - * Slow path for the placeholder replacement. + * Slow path for the placeholder replacement. * Function passed as a tempalte parameter to `Allocator` to allow lazy * replacement. This function initialises the thread local state if requried. * The simple check that this is the global placeholder is inlined, the rest - * of it is only hit in a very unusual case and so should go off the fast path. + * of it is only hit in a very unusual case and so should go off the fast + * path. */ SNMALLOC_SLOW_PATH inline void* init_thread_allocator() { diff --git a/src/test/func/first_operation/first_operation.cc b/src/test/func/first_operation/first_operation.cc index 108da39eb..6af9954af 100644 --- a/src/test/func/first_operation/first_operation.cc +++ b/src/test/func/first_operation/first_operation.cc @@ -1,10 +1,12 @@ /** - * The first operation a thread performs takes a different path to every subsequent operation - * as it must lazily initialise the thread local allocator. This tests performs all sizes of - * allocation, and deallocation as the first operation. - ***/ + * The first operation a thread performs takes a different path to every + * subsequent operation as it must lazily initialise the thread local allocator. + * This tests performs all sizes of allocation, and deallocation as the first + * operation. + */ #include "test/setup.h" + #include #include @@ -25,7 +27,7 @@ void alloc3(size_t size) { auto a = snmalloc::ThreadAlloc::get_noncachable(); void* r = a->alloc(size); - a->dealloc(r,size); + a->dealloc(r, size); } void alloc4(size_t size) @@ -42,7 +44,7 @@ void dealloc1(void* p, size_t) void dealloc2(void* p, size_t size) { - snmalloc::ThreadAlloc::get_noncachable()->dealloc(p,size); + snmalloc::ThreadAlloc::get_noncachable()->dealloc(p, size); } void dealloc3(void* p, size_t) @@ -61,7 +63,7 @@ void f(size_t size) auto t2 = std::thread(alloc2, size); auto t3 = std::thread(alloc3, size); auto t4 = std::thread(alloc4, size); - + auto a = snmalloc::ThreadAlloc::get(); auto p1 = a->alloc(size); auto p2 = a->alloc(size); @@ -108,6 +110,3 @@ int main(int, char**) f((7ULL << exp) - 1); } } - - - From a857b92a7c7b7c15c5406582a81923917ef7b098 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 13:08:49 +0000 Subject: [PATCH 28/37] More CR --- src/ds/bits.h | 4 ++-- src/ds/cdllist.h | 8 ++++---- src/ds/helpers.h | 2 +- src/mem/alloc.h | 18 +++++++++--------- src/mem/largealloc.h | 12 ++++++------ src/mem/metaslab.h | 4 ++-- src/mem/pagemap.h | 2 +- src/mem/pool.h | 2 +- src/mem/slab.h | 4 ++-- src/mem/threadalloc.h | 8 ++++---- src/pal/pal_consts.h | 10 +++++----- src/pal/pal_windows.h | 4 ++-- src/test/perf/low_memory/low-memory.cc | 2 +- 13 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/ds/bits.h b/src/ds/bits.h index 9dc05b0aa..3a11088e0 100644 --- a/src/ds/bits.h +++ b/src/ds/bits.h @@ -328,7 +328,7 @@ namespace snmalloc * * `std::min` is in ``, so pulls in a lot of unneccessary code * We write our own to reduce the code that potentially needs reviewing. - **/ + */ template constexpr inline T min(T t1, T t2) { @@ -340,7 +340,7 @@ namespace snmalloc * * `std::max` is in ``, so pulls in a lot of unneccessary code * We write our own to reduce the code that potentially needs reviewing. - **/ + */ template constexpr inline T max(T t1, T t2) { diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h index fcf4e135e..e7f849884 100644 --- a/src/ds/cdllist.h +++ b/src/ds/cdllist.h @@ -11,7 +11,7 @@ namespace snmalloc * Special class for cyclic doubly linked non-empty linked list * * This code assumes there is always one element in the list. The client - * must ensure there is a sentinal element. + */ust ensure there is a sentinal element. **/ class CDLLNode { @@ -20,7 +20,7 @@ namespace snmalloc public: /** - * Single element cyclic list. This is the empty case. + */ingle element cyclic list. This is the empty case. **/ CDLLNode() { @@ -34,7 +34,7 @@ namespace snmalloc } /** - * Removes this element from the cyclic list is it part of. + */emoves this element from the cyclic list is it part of. **/ SNMALLOC_FAST_PATH void remove() { @@ -85,7 +85,7 @@ namespace snmalloc /** * Checks the lists invariants * x->next->prev = x - * for all x in the list. + */or all x in the list. **/ void debug_check() { diff --git a/src/ds/helpers.h b/src/ds/helpers.h index 5434c3347..e3c2290a5 100644 --- a/src/ds/helpers.h +++ b/src/ds/helpers.h @@ -48,7 +48,7 @@ namespace snmalloc * * Wraps on read. This allows code to trust the value is in range, even when * there is a memory corruption. - **/ + */ template class Mod { diff --git a/src/mem/alloc.h b/src/mem/alloc.h index e868a6e73..a212514b4 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -714,7 +714,7 @@ namespace snmalloc * * If result pointer is null, then this code raises a Pal::error on the * particular check that fails, if any do fail. - **/ + */ void debug_is_empty(bool* result) { auto test = [&result](auto& queue) { @@ -890,7 +890,7 @@ namespace snmalloc /** * Check if this allocator has messages to deallocate blocks from another * thread - **/ + */ SNMALLOC_FAST_PATH bool has_messages() { return !(message_queue().is_empty()); @@ -1042,7 +1042,7 @@ namespace snmalloc /** * Slow path for handling message queue, before dealing with small * allocation request. - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass) { @@ -1053,7 +1053,7 @@ namespace snmalloc /** * Attempt to find a new free list to allocate from - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass) { @@ -1079,7 +1079,7 @@ namespace snmalloc * Called when there are no available free list to service this request * Could be due to using the dummy allocator, or needing to bump allocate a * new free list. - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_rare(sizeclass_t sizeclass) { @@ -1094,7 +1094,7 @@ namespace snmalloc /** * Called on first allocation to set up the thread local allocator, * then directs the allocation request to the newly created allocator. - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(sizeclass_t sizeclass) { @@ -1106,7 +1106,7 @@ namespace snmalloc /** * Called to create a new free list, and service the request from that new * list. - **/ + */ template SNMALLOC_FAST_PATH void* small_alloc_new_free_list(sizeclass_t sizeclass) { @@ -1122,7 +1122,7 @@ namespace snmalloc /** * Creates a new free list from the thread local bump allocator and service * the request from that new list. - **/ + */ template SNMALLOC_FAST_PATH void* small_alloc_build_free_list(sizeclass_t sizeclass) { @@ -1146,7 +1146,7 @@ namespace snmalloc * Allocates a new slab to allocate from, set it to be the bump allocator * for this size class, and then builds a new free list from the thread * local bump allocator and service the request from that new list. - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_new_slab(sizeclass_t sizeclass) { diff --git a/src/mem/largealloc.h b/src/mem/largealloc.h index ac8b6a2f2..ca5f857ae 100644 --- a/src/mem/largealloc.h +++ b/src/mem/largealloc.h @@ -60,23 +60,23 @@ namespace snmalloc { /** * Flag to protect the bump allocator - **/ + */ std::atomic_flag lock = ATOMIC_FLAG_INIT; /** * Pointer to block being bump allocated - **/ + */ void* bump = nullptr; /** * Space remaining in this block being bump allocated - **/ + */ size_t remaining = 0; /** * Simple flag for checking if another instance of lazy-decommit is * running - **/ + */ std::atomic_flag lazy_decommit_guard = {}; public: @@ -87,7 +87,7 @@ namespace snmalloc /** * Make a new memory provide for this PAL. - **/ + */ static MemoryProviderStateMixin* make() noexcept { // Temporary stack-based storage to start the allocator in. @@ -197,7 +197,7 @@ namespace snmalloc /*** * Method for callback object to perform lazy decommit. - **/ + */ static void process(PalNotificationObject* p) { // Unsafe downcast here. Don't want vtable and RTTI. diff --git a/src/mem/metaslab.h b/src/mem/metaslab.h index efd0d81d0..0ece39ed7 100644 --- a/src/mem/metaslab.h +++ b/src/mem/metaslab.h @@ -65,7 +65,7 @@ namespace snmalloc * - was full before the subtraction * this returns true, otherwise returns false. **/ - bool return_object() + b*/ return_object() { return (--needed) == 0; } @@ -158,7 +158,7 @@ namespace snmalloc * We don't expect a cycle, so worst case is only followed by a crash, so * slow doesn't mater. **/ - size_t debug_slab_acyclic_free_list(Slab* slab) + s*/_t debug_slab_acyclic_free_list(Slab* slab) { #ifndef NDEBUG size_t length = 0; diff --git a/src/mem/pagemap.h b/src/mem/pagemap.h index 6d2c94445..fc3da8550 100644 --- a/src/mem/pagemap.h +++ b/src/mem/pagemap.h @@ -319,7 +319,7 @@ namespace snmalloc /** * Simple pagemap that for each GRANULARITY_BITS of the address range * stores a T. - **/ + */ template class alignas(OS_PAGE_SIZE) FlatPagemap { diff --git a/src/mem/pool.h b/src/mem/pool.h index 2736c568c..366e68728 100644 --- a/src/mem/pool.h +++ b/src/mem/pool.h @@ -15,7 +15,7 @@ namespace snmalloc * concurrency safe. * * This is used to bootstrap the allocation of allocators. - **/ + */ template class Pool { diff --git a/src/mem/slab.h b/src/mem/slab.h index e07aac60e..548d9ffc1 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -35,7 +35,7 @@ namespace snmalloc * Takes a free list out of a slabs meta data. * Returns the link as the allocation, and places the free list into the * `fast_free_list` for further allocations. - **/ + */ template SNMALLOC_FAST_PATH void* alloc( SlabList& sl, @@ -90,7 +90,7 @@ namespace snmalloc * list, and stores it in the fast_free_list. It will only create a page * worth of allocations, or one if the allocation size is larger than a * page. - **/ + */ static SNMALLOC_FAST_PATH void alloc_new_list(void*& bumpptr, FreeListHead& fast_free_list, size_t rsize) { diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 4db62d7c9..9c53f4cee 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -43,7 +43,7 @@ namespace snmalloc * replacement. This function returns true, if the allocator passed in * requires initialisation. As the TLS state is managed externally, * this will always return false. - **/ + */ SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) { UNUSED(existing); @@ -54,7 +54,7 @@ namespace snmalloc * Function passed as a tempalte parameter to `Allocator` to allow lazy * replacement. There is nothing to initialise in this case, so we expect * this to never be called. - **/ + */ SNMALLOC_FAST_PATH void* init_thread_allocator() { return nullptr; @@ -94,7 +94,7 @@ namespace snmalloc /** * Default clean up does nothing except print statistics if enabled. - **/ + */ static void register_cleanup() { # ifdef USE_SNMALLOC_STATS @@ -254,7 +254,7 @@ namespace snmalloc * replacement. This function returns true, if the allocated passed in, * is the placeholder allocator. If it returns true, then * `init_thread_allocator` should be called. - **/ + */ SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) { return existing == &GlobalPlaceHolder; diff --git a/src/pal/pal_consts.h b/src/pal/pal_consts.h index a86feb536..538f90d86 100644 --- a/src/pal/pal_consts.h +++ b/src/pal/pal_consts.h @@ -62,7 +62,7 @@ namespace snmalloc * This struct is used to represent callbacks for notification from the * platform. It contains a next pointer as client is responsible for * allocation as we cannot assume an allocator at this point. - **/ + */ struct PalNotificationObject { std::atomic pal_next; @@ -72,12 +72,12 @@ namespace snmalloc /*** * Wrapper for managing notifications for PAL events - **/ + */ class PalNotifier { /** * List of callbacks to notify - **/ + */ std::atomic callbacks = nullptr; public: @@ -86,7 +86,7 @@ namespace snmalloc * * The object should never be deallocated by the client after calling * this. - **/ + */ void register_notification(PalNotificationObject* callback) { callback->pal_next = nullptr; @@ -105,7 +105,7 @@ namespace snmalloc /** * Calls the pal_notify of all the registered objects. - **/ + */ void notify_all() { PalNotificationObject* curr = callbacks; diff --git a/src/pal/pal_windows.h b/src/pal/pal_windows.h index 8c1fc277b..ba7fcea7e 100644 --- a/src/pal/pal_windows.h +++ b/src/pal/pal_windows.h @@ -34,7 +34,7 @@ namespace snmalloc /** * List of callbacks for low-memory notification - **/ + */ static inline PalNotifier low_memory_callbacks; /** @@ -98,7 +98,7 @@ namespace snmalloc * Register callback object for low-memory notifications. * Client is responsible for allocation, and ensuring the object is live * for the duration of the program. - **/ + */ static void register_for_low_memory_callback(PalNotificationObject* callback) { diff --git a/src/test/perf/low_memory/low-memory.cc b/src/test/perf/low_memory/low-memory.cc index 046a2cc54..68007b4a7 100644 --- a/src/test/perf/low_memory/low-memory.cc +++ b/src/test/perf/low_memory/low-memory.cc @@ -97,7 +97,7 @@ void reduce_pressure(Queue& allocations) * Wrapper to handle Pals that don't have the method. * Template parameter required to handle `if constexpr` always evaluating both * sides. - **/ + */ template void register_for_pal_notifications() { From 841314edc1d1571d7cf3b3027107d9f7c28f96e5 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 13:13:47 +0000 Subject: [PATCH 29/37] Revert "More CR" This reverts commit a857b92a7c7b7c15c5406582a81923917ef7b098. --- src/ds/bits.h | 4 ++-- src/ds/cdllist.h | 8 ++++---- src/ds/helpers.h | 2 +- src/mem/alloc.h | 18 +++++++++--------- src/mem/largealloc.h | 12 ++++++------ src/mem/metaslab.h | 4 ++-- src/mem/pagemap.h | 2 +- src/mem/pool.h | 2 +- src/mem/slab.h | 4 ++-- src/mem/threadalloc.h | 8 ++++---- src/pal/pal_consts.h | 10 +++++----- src/pal/pal_windows.h | 4 ++-- src/test/perf/low_memory/low-memory.cc | 2 +- 13 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/ds/bits.h b/src/ds/bits.h index 3a11088e0..9dc05b0aa 100644 --- a/src/ds/bits.h +++ b/src/ds/bits.h @@ -328,7 +328,7 @@ namespace snmalloc * * `std::min` is in ``, so pulls in a lot of unneccessary code * We write our own to reduce the code that potentially needs reviewing. - */ + **/ template constexpr inline T min(T t1, T t2) { @@ -340,7 +340,7 @@ namespace snmalloc * * `std::max` is in ``, so pulls in a lot of unneccessary code * We write our own to reduce the code that potentially needs reviewing. - */ + **/ template constexpr inline T max(T t1, T t2) { diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h index e7f849884..fcf4e135e 100644 --- a/src/ds/cdllist.h +++ b/src/ds/cdllist.h @@ -11,7 +11,7 @@ namespace snmalloc * Special class for cyclic doubly linked non-empty linked list * * This code assumes there is always one element in the list. The client - */ust ensure there is a sentinal element. + * must ensure there is a sentinal element. **/ class CDLLNode { @@ -20,7 +20,7 @@ namespace snmalloc public: /** - */ingle element cyclic list. This is the empty case. + * Single element cyclic list. This is the empty case. **/ CDLLNode() { @@ -34,7 +34,7 @@ namespace snmalloc } /** - */emoves this element from the cyclic list is it part of. + * Removes this element from the cyclic list is it part of. **/ SNMALLOC_FAST_PATH void remove() { @@ -85,7 +85,7 @@ namespace snmalloc /** * Checks the lists invariants * x->next->prev = x - */or all x in the list. + * for all x in the list. **/ void debug_check() { diff --git a/src/ds/helpers.h b/src/ds/helpers.h index e3c2290a5..5434c3347 100644 --- a/src/ds/helpers.h +++ b/src/ds/helpers.h @@ -48,7 +48,7 @@ namespace snmalloc * * Wraps on read. This allows code to trust the value is in range, even when * there is a memory corruption. - */ + **/ template class Mod { diff --git a/src/mem/alloc.h b/src/mem/alloc.h index a212514b4..e868a6e73 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -714,7 +714,7 @@ namespace snmalloc * * If result pointer is null, then this code raises a Pal::error on the * particular check that fails, if any do fail. - */ + **/ void debug_is_empty(bool* result) { auto test = [&result](auto& queue) { @@ -890,7 +890,7 @@ namespace snmalloc /** * Check if this allocator has messages to deallocate blocks from another * thread - */ + **/ SNMALLOC_FAST_PATH bool has_messages() { return !(message_queue().is_empty()); @@ -1042,7 +1042,7 @@ namespace snmalloc /** * Slow path for handling message queue, before dealing with small * allocation request. - */ + **/ template SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass) { @@ -1053,7 +1053,7 @@ namespace snmalloc /** * Attempt to find a new free list to allocate from - */ + **/ template SNMALLOC_SLOW_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass) { @@ -1079,7 +1079,7 @@ namespace snmalloc * Called when there are no available free list to service this request * Could be due to using the dummy allocator, or needing to bump allocate a * new free list. - */ + **/ template SNMALLOC_SLOW_PATH void* small_alloc_rare(sizeclass_t sizeclass) { @@ -1094,7 +1094,7 @@ namespace snmalloc /** * Called on first allocation to set up the thread local allocator, * then directs the allocation request to the newly created allocator. - */ + **/ template SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(sizeclass_t sizeclass) { @@ -1106,7 +1106,7 @@ namespace snmalloc /** * Called to create a new free list, and service the request from that new * list. - */ + **/ template SNMALLOC_FAST_PATH void* small_alloc_new_free_list(sizeclass_t sizeclass) { @@ -1122,7 +1122,7 @@ namespace snmalloc /** * Creates a new free list from the thread local bump allocator and service * the request from that new list. - */ + **/ template SNMALLOC_FAST_PATH void* small_alloc_build_free_list(sizeclass_t sizeclass) { @@ -1146,7 +1146,7 @@ namespace snmalloc * Allocates a new slab to allocate from, set it to be the bump allocator * for this size class, and then builds a new free list from the thread * local bump allocator and service the request from that new list. - */ + **/ template SNMALLOC_SLOW_PATH void* small_alloc_new_slab(sizeclass_t sizeclass) { diff --git a/src/mem/largealloc.h b/src/mem/largealloc.h index ca5f857ae..ac8b6a2f2 100644 --- a/src/mem/largealloc.h +++ b/src/mem/largealloc.h @@ -60,23 +60,23 @@ namespace snmalloc { /** * Flag to protect the bump allocator - */ + **/ std::atomic_flag lock = ATOMIC_FLAG_INIT; /** * Pointer to block being bump allocated - */ + **/ void* bump = nullptr; /** * Space remaining in this block being bump allocated - */ + **/ size_t remaining = 0; /** * Simple flag for checking if another instance of lazy-decommit is * running - */ + **/ std::atomic_flag lazy_decommit_guard = {}; public: @@ -87,7 +87,7 @@ namespace snmalloc /** * Make a new memory provide for this PAL. - */ + **/ static MemoryProviderStateMixin* make() noexcept { // Temporary stack-based storage to start the allocator in. @@ -197,7 +197,7 @@ namespace snmalloc /*** * Method for callback object to perform lazy decommit. - */ + **/ static void process(PalNotificationObject* p) { // Unsafe downcast here. Don't want vtable and RTTI. diff --git a/src/mem/metaslab.h b/src/mem/metaslab.h index 0ece39ed7..efd0d81d0 100644 --- a/src/mem/metaslab.h +++ b/src/mem/metaslab.h @@ -65,7 +65,7 @@ namespace snmalloc * - was full before the subtraction * this returns true, otherwise returns false. **/ - b*/ return_object() + bool return_object() { return (--needed) == 0; } @@ -158,7 +158,7 @@ namespace snmalloc * We don't expect a cycle, so worst case is only followed by a crash, so * slow doesn't mater. **/ - s*/_t debug_slab_acyclic_free_list(Slab* slab) + size_t debug_slab_acyclic_free_list(Slab* slab) { #ifndef NDEBUG size_t length = 0; diff --git a/src/mem/pagemap.h b/src/mem/pagemap.h index fc3da8550..6d2c94445 100644 --- a/src/mem/pagemap.h +++ b/src/mem/pagemap.h @@ -319,7 +319,7 @@ namespace snmalloc /** * Simple pagemap that for each GRANULARITY_BITS of the address range * stores a T. - */ + **/ template class alignas(OS_PAGE_SIZE) FlatPagemap { diff --git a/src/mem/pool.h b/src/mem/pool.h index 366e68728..2736c568c 100644 --- a/src/mem/pool.h +++ b/src/mem/pool.h @@ -15,7 +15,7 @@ namespace snmalloc * concurrency safe. * * This is used to bootstrap the allocation of allocators. - */ + **/ template class Pool { diff --git a/src/mem/slab.h b/src/mem/slab.h index 548d9ffc1..e07aac60e 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -35,7 +35,7 @@ namespace snmalloc * Takes a free list out of a slabs meta data. * Returns the link as the allocation, and places the free list into the * `fast_free_list` for further allocations. - */ + **/ template SNMALLOC_FAST_PATH void* alloc( SlabList& sl, @@ -90,7 +90,7 @@ namespace snmalloc * list, and stores it in the fast_free_list. It will only create a page * worth of allocations, or one if the allocation size is larger than a * page. - */ + **/ static SNMALLOC_FAST_PATH void alloc_new_list(void*& bumpptr, FreeListHead& fast_free_list, size_t rsize) { diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 9c53f4cee..4db62d7c9 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -43,7 +43,7 @@ namespace snmalloc * replacement. This function returns true, if the allocator passed in * requires initialisation. As the TLS state is managed externally, * this will always return false. - */ + **/ SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) { UNUSED(existing); @@ -54,7 +54,7 @@ namespace snmalloc * Function passed as a tempalte parameter to `Allocator` to allow lazy * replacement. There is nothing to initialise in this case, so we expect * this to never be called. - */ + **/ SNMALLOC_FAST_PATH void* init_thread_allocator() { return nullptr; @@ -94,7 +94,7 @@ namespace snmalloc /** * Default clean up does nothing except print statistics if enabled. - */ + **/ static void register_cleanup() { # ifdef USE_SNMALLOC_STATS @@ -254,7 +254,7 @@ namespace snmalloc * replacement. This function returns true, if the allocated passed in, * is the placeholder allocator. If it returns true, then * `init_thread_allocator` should be called. - */ + **/ SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) { return existing == &GlobalPlaceHolder; diff --git a/src/pal/pal_consts.h b/src/pal/pal_consts.h index 538f90d86..a86feb536 100644 --- a/src/pal/pal_consts.h +++ b/src/pal/pal_consts.h @@ -62,7 +62,7 @@ namespace snmalloc * This struct is used to represent callbacks for notification from the * platform. It contains a next pointer as client is responsible for * allocation as we cannot assume an allocator at this point. - */ + **/ struct PalNotificationObject { std::atomic pal_next; @@ -72,12 +72,12 @@ namespace snmalloc /*** * Wrapper for managing notifications for PAL events - */ + **/ class PalNotifier { /** * List of callbacks to notify - */ + **/ std::atomic callbacks = nullptr; public: @@ -86,7 +86,7 @@ namespace snmalloc * * The object should never be deallocated by the client after calling * this. - */ + **/ void register_notification(PalNotificationObject* callback) { callback->pal_next = nullptr; @@ -105,7 +105,7 @@ namespace snmalloc /** * Calls the pal_notify of all the registered objects. - */ + **/ void notify_all() { PalNotificationObject* curr = callbacks; diff --git a/src/pal/pal_windows.h b/src/pal/pal_windows.h index ba7fcea7e..8c1fc277b 100644 --- a/src/pal/pal_windows.h +++ b/src/pal/pal_windows.h @@ -34,7 +34,7 @@ namespace snmalloc /** * List of callbacks for low-memory notification - */ + **/ static inline PalNotifier low_memory_callbacks; /** @@ -98,7 +98,7 @@ namespace snmalloc * Register callback object for low-memory notifications. * Client is responsible for allocation, and ensuring the object is live * for the duration of the program. - */ + **/ static void register_for_low_memory_callback(PalNotificationObject* callback) { diff --git a/src/test/perf/low_memory/low-memory.cc b/src/test/perf/low_memory/low-memory.cc index 68007b4a7..046a2cc54 100644 --- a/src/test/perf/low_memory/low-memory.cc +++ b/src/test/perf/low_memory/low-memory.cc @@ -97,7 +97,7 @@ void reduce_pressure(Queue& allocations) * Wrapper to handle Pals that don't have the method. * Template parameter required to handle `if constexpr` always evaluating both * sides. - */ + **/ template void register_for_pal_notifications() { From baff3efb0a56e52e86a6a9fcc6654385264ee616 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 13:14:33 +0000 Subject: [PATCH 30/37] CR attempt two. --- src/ds/bits.h | 4 ++-- src/ds/cdllist.h | 8 ++++---- src/ds/helpers.h | 2 +- src/mem/alloc.h | 18 +++++++++--------- src/mem/largealloc.h | 12 ++++++------ src/mem/metaslab.h | 4 ++-- src/mem/pagemap.h | 2 +- src/mem/pool.h | 2 +- src/mem/slab.h | 4 ++-- src/mem/threadalloc.h | 8 ++++---- src/pal/pal_consts.h | 10 +++++----- src/pal/pal_windows.h | 4 ++-- src/test/perf/low_memory/low-memory.cc | 2 +- 13 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/ds/bits.h b/src/ds/bits.h index 9dc05b0aa..3a11088e0 100644 --- a/src/ds/bits.h +++ b/src/ds/bits.h @@ -328,7 +328,7 @@ namespace snmalloc * * `std::min` is in ``, so pulls in a lot of unneccessary code * We write our own to reduce the code that potentially needs reviewing. - **/ + */ template constexpr inline T min(T t1, T t2) { @@ -340,7 +340,7 @@ namespace snmalloc * * `std::max` is in ``, so pulls in a lot of unneccessary code * We write our own to reduce the code that potentially needs reviewing. - **/ + */ template constexpr inline T max(T t1, T t2) { diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h index fcf4e135e..d1f6c6525 100644 --- a/src/ds/cdllist.h +++ b/src/ds/cdllist.h @@ -12,7 +12,7 @@ namespace snmalloc * * This code assumes there is always one element in the list. The client * must ensure there is a sentinal element. - **/ + */ class CDLLNode { CDLLNode* next; @@ -21,7 +21,7 @@ namespace snmalloc public: /** * Single element cyclic list. This is the empty case. - **/ + */ CDLLNode() { next = this; @@ -35,7 +35,7 @@ namespace snmalloc /** * Removes this element from the cyclic list is it part of. - **/ + */ SNMALLOC_FAST_PATH void remove() { SNMALLOC_ASSERT(!is_empty()); @@ -86,7 +86,7 @@ namespace snmalloc * Checks the lists invariants * x->next->prev = x * for all x in the list. - **/ + */ void debug_check() { #ifndef NDEBUG diff --git a/src/ds/helpers.h b/src/ds/helpers.h index 5434c3347..e3c2290a5 100644 --- a/src/ds/helpers.h +++ b/src/ds/helpers.h @@ -48,7 +48,7 @@ namespace snmalloc * * Wraps on read. This allows code to trust the value is in range, even when * there is a memory corruption. - **/ + */ template class Mod { diff --git a/src/mem/alloc.h b/src/mem/alloc.h index e868a6e73..a212514b4 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -714,7 +714,7 @@ namespace snmalloc * * If result pointer is null, then this code raises a Pal::error on the * particular check that fails, if any do fail. - **/ + */ void debug_is_empty(bool* result) { auto test = [&result](auto& queue) { @@ -890,7 +890,7 @@ namespace snmalloc /** * Check if this allocator has messages to deallocate blocks from another * thread - **/ + */ SNMALLOC_FAST_PATH bool has_messages() { return !(message_queue().is_empty()); @@ -1042,7 +1042,7 @@ namespace snmalloc /** * Slow path for handling message queue, before dealing with small * allocation request. - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass) { @@ -1053,7 +1053,7 @@ namespace snmalloc /** * Attempt to find a new free list to allocate from - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass) { @@ -1079,7 +1079,7 @@ namespace snmalloc * Called when there are no available free list to service this request * Could be due to using the dummy allocator, or needing to bump allocate a * new free list. - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_rare(sizeclass_t sizeclass) { @@ -1094,7 +1094,7 @@ namespace snmalloc /** * Called on first allocation to set up the thread local allocator, * then directs the allocation request to the newly created allocator. - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(sizeclass_t sizeclass) { @@ -1106,7 +1106,7 @@ namespace snmalloc /** * Called to create a new free list, and service the request from that new * list. - **/ + */ template SNMALLOC_FAST_PATH void* small_alloc_new_free_list(sizeclass_t sizeclass) { @@ -1122,7 +1122,7 @@ namespace snmalloc /** * Creates a new free list from the thread local bump allocator and service * the request from that new list. - **/ + */ template SNMALLOC_FAST_PATH void* small_alloc_build_free_list(sizeclass_t sizeclass) { @@ -1146,7 +1146,7 @@ namespace snmalloc * Allocates a new slab to allocate from, set it to be the bump allocator * for this size class, and then builds a new free list from the thread * local bump allocator and service the request from that new list. - **/ + */ template SNMALLOC_SLOW_PATH void* small_alloc_new_slab(sizeclass_t sizeclass) { diff --git a/src/mem/largealloc.h b/src/mem/largealloc.h index ac8b6a2f2..ca5f857ae 100644 --- a/src/mem/largealloc.h +++ b/src/mem/largealloc.h @@ -60,23 +60,23 @@ namespace snmalloc { /** * Flag to protect the bump allocator - **/ + */ std::atomic_flag lock = ATOMIC_FLAG_INIT; /** * Pointer to block being bump allocated - **/ + */ void* bump = nullptr; /** * Space remaining in this block being bump allocated - **/ + */ size_t remaining = 0; /** * Simple flag for checking if another instance of lazy-decommit is * running - **/ + */ std::atomic_flag lazy_decommit_guard = {}; public: @@ -87,7 +87,7 @@ namespace snmalloc /** * Make a new memory provide for this PAL. - **/ + */ static MemoryProviderStateMixin* make() noexcept { // Temporary stack-based storage to start the allocator in. @@ -197,7 +197,7 @@ namespace snmalloc /*** * Method for callback object to perform lazy decommit. - **/ + */ static void process(PalNotificationObject* p) { // Unsafe downcast here. Don't want vtable and RTTI. diff --git a/src/mem/metaslab.h b/src/mem/metaslab.h index efd0d81d0..4d53b77f2 100644 --- a/src/mem/metaslab.h +++ b/src/mem/metaslab.h @@ -64,7 +64,7 @@ namespace snmalloc * - empty adding the entry to the free list, or * - was full before the subtraction * this returns true, otherwise returns false. - **/ + */ bool return_object() { return (--needed) == 0; @@ -157,7 +157,7 @@ namespace snmalloc * https://en.wikipedia.org/wiki/Cycle_detection#Floyd's_Tortoise_and_Hare * We don't expect a cycle, so worst case is only followed by a crash, so * slow doesn't mater. - **/ + */ size_t debug_slab_acyclic_free_list(Slab* slab) { #ifndef NDEBUG diff --git a/src/mem/pagemap.h b/src/mem/pagemap.h index 6d2c94445..fc3da8550 100644 --- a/src/mem/pagemap.h +++ b/src/mem/pagemap.h @@ -319,7 +319,7 @@ namespace snmalloc /** * Simple pagemap that for each GRANULARITY_BITS of the address range * stores a T. - **/ + */ template class alignas(OS_PAGE_SIZE) FlatPagemap { diff --git a/src/mem/pool.h b/src/mem/pool.h index 2736c568c..366e68728 100644 --- a/src/mem/pool.h +++ b/src/mem/pool.h @@ -15,7 +15,7 @@ namespace snmalloc * concurrency safe. * * This is used to bootstrap the allocation of allocators. - **/ + */ template class Pool { diff --git a/src/mem/slab.h b/src/mem/slab.h index e07aac60e..548d9ffc1 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -35,7 +35,7 @@ namespace snmalloc * Takes a free list out of a slabs meta data. * Returns the link as the allocation, and places the free list into the * `fast_free_list` for further allocations. - **/ + */ template SNMALLOC_FAST_PATH void* alloc( SlabList& sl, @@ -90,7 +90,7 @@ namespace snmalloc * list, and stores it in the fast_free_list. It will only create a page * worth of allocations, or one if the allocation size is larger than a * page. - **/ + */ static SNMALLOC_FAST_PATH void alloc_new_list(void*& bumpptr, FreeListHead& fast_free_list, size_t rsize) { diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 4db62d7c9..9c53f4cee 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -43,7 +43,7 @@ namespace snmalloc * replacement. This function returns true, if the allocator passed in * requires initialisation. As the TLS state is managed externally, * this will always return false. - **/ + */ SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) { UNUSED(existing); @@ -54,7 +54,7 @@ namespace snmalloc * Function passed as a tempalte parameter to `Allocator` to allow lazy * replacement. There is nothing to initialise in this case, so we expect * this to never be called. - **/ + */ SNMALLOC_FAST_PATH void* init_thread_allocator() { return nullptr; @@ -94,7 +94,7 @@ namespace snmalloc /** * Default clean up does nothing except print statistics if enabled. - **/ + */ static void register_cleanup() { # ifdef USE_SNMALLOC_STATS @@ -254,7 +254,7 @@ namespace snmalloc * replacement. This function returns true, if the allocated passed in, * is the placeholder allocator. If it returns true, then * `init_thread_allocator` should be called. - **/ + */ SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) { return existing == &GlobalPlaceHolder; diff --git a/src/pal/pal_consts.h b/src/pal/pal_consts.h index a86feb536..538f90d86 100644 --- a/src/pal/pal_consts.h +++ b/src/pal/pal_consts.h @@ -62,7 +62,7 @@ namespace snmalloc * This struct is used to represent callbacks for notification from the * platform. It contains a next pointer as client is responsible for * allocation as we cannot assume an allocator at this point. - **/ + */ struct PalNotificationObject { std::atomic pal_next; @@ -72,12 +72,12 @@ namespace snmalloc /*** * Wrapper for managing notifications for PAL events - **/ + */ class PalNotifier { /** * List of callbacks to notify - **/ + */ std::atomic callbacks = nullptr; public: @@ -86,7 +86,7 @@ namespace snmalloc * * The object should never be deallocated by the client after calling * this. - **/ + */ void register_notification(PalNotificationObject* callback) { callback->pal_next = nullptr; @@ -105,7 +105,7 @@ namespace snmalloc /** * Calls the pal_notify of all the registered objects. - **/ + */ void notify_all() { PalNotificationObject* curr = callbacks; diff --git a/src/pal/pal_windows.h b/src/pal/pal_windows.h index 8c1fc277b..ba7fcea7e 100644 --- a/src/pal/pal_windows.h +++ b/src/pal/pal_windows.h @@ -34,7 +34,7 @@ namespace snmalloc /** * List of callbacks for low-memory notification - **/ + */ static inline PalNotifier low_memory_callbacks; /** @@ -98,7 +98,7 @@ namespace snmalloc * Register callback object for low-memory notifications. * Client is responsible for allocation, and ensuring the object is live * for the duration of the program. - **/ + */ static void register_for_low_memory_callback(PalNotificationObject* callback) { diff --git a/src/test/perf/low_memory/low-memory.cc b/src/test/perf/low_memory/low-memory.cc index 046a2cc54..68007b4a7 100644 --- a/src/test/perf/low_memory/low-memory.cc +++ b/src/test/perf/low_memory/low-memory.cc @@ -97,7 +97,7 @@ void reduce_pressure(Queue& allocations) * Wrapper to handle Pals that don't have the method. * Template parameter required to handle `if constexpr` always evaluating both * sides. - **/ + */ template void register_for_pal_notifications() { From 6bf711505df44dab339930c2573cd87a2e16fe52 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 13:15:46 +0000 Subject: [PATCH 31/37] Fix assert --- src/mem/alloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index a212514b4..9e94229df 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1129,7 +1129,7 @@ namespace snmalloc auto& bp = bump_ptrs[sizeclass]; auto rsize = sizeclass_to_size(sizeclass); auto& ffl = small_fast_free_lists[sizeclass]; - assert(ffl.value == nullptr); + SNMALLOC_ASSERT(ffl.value == nullptr); Slab::alloc_new_list(bp, ffl, rsize); void* p = remove_cache_friendly_offset(ffl.value, sizeclass); From 193e27a3f713da62a9e701bf2ab3d6c99a85c999 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 14:05:56 +0000 Subject: [PATCH 32/37] Bug fix found by CI. --- src/mem/alloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 9e94229df..39df2923b 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -269,7 +269,7 @@ namespace snmalloc SNMALLOC_SLOW_PATH void dealloc_sized_slow(void* p, size_t size) { if (size == 0) - dealloc(p, 1); + return dealloc(p, 1); if (likely(size <= sizeclass_to_size(NUM_SIZECLASSES - 1))) { From 6d60feb105a98e0544d885db594bcf4119ac83b4 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 16 Mar 2020 14:12:46 +0000 Subject: [PATCH 33/37] Clang tidy. --- src/mem/alloc.h | 2 +- src/mem/superslab.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 39df2923b..ba79021ca 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -98,7 +98,7 @@ namespace snmalloc * If aligned to a SLAB start, then it is empty, and a new * slab is required. */ - void* bump_ptrs[NUM_SMALL_CLASSES] = {0}; + void* bump_ptrs[NUM_SMALL_CLASSES] = {nullptr}; public: Stats& stats() diff --git a/src/mem/superslab.h b/src/mem/superslab.h index 70890cbc5..443e33f80 100644 --- a/src/mem/superslab.h +++ b/src/mem/superslab.h @@ -166,7 +166,7 @@ namespace snmalloc // returned all the elements, but this is a slab that is still being bump // allocated from. Hence, the bump allocator slab will never be returned // for use in another size class. - meta[0].allocated = (uint16_t)( + meta[0].allocated = static_cast( (SLAB_SIZE - get_initial_offset(sizeclass, true)) / sizeclass_to_size(sizeclass)); meta[0].link = 1; @@ -191,7 +191,7 @@ namespace snmalloc // returned all the elements, but this is a slab that is still being bump // allocated from. Hence, the bump allocator slab will never be returned // for use in another size class. - meta[h].allocated = (uint16_t)( + meta[h].allocated = static_cast( (SLAB_SIZE - get_initial_offset(sizeclass, false)) / sizeclass_to_size(sizeclass)); meta[h].needed = 1; From 0c40c84feba276838dd14c80aeb4226afdba0f04 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 25 Mar 2020 10:05:25 +0000 Subject: [PATCH 34/37] Use a ptrdiff to help with zero init. --- src/ds/cdllist.h | 49 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h index d1f6c6525..603676c3c 100644 --- a/src/ds/cdllist.h +++ b/src/ds/cdllist.h @@ -15,8 +15,23 @@ namespace snmalloc */ class CDLLNode { - CDLLNode* next; - CDLLNode* prev; + /** + * to_next is used to handle a zero initialised data structure. + * This means that `is_empty` works even when the constructor hasn't + * been run. + */ + ptrdiff_t to_next = 0; + +// TODO: CHERI will need a real pointer too +// CDLLNode* next = nullptr; + CDLLNode* prev = nullptr; + + void set_next(CDLLNode* c) + { +// TODO: CHERI will need a real pointer too +// next = c; + to_next = pointer_diff(c, this); + } public: /** @@ -24,13 +39,13 @@ namespace snmalloc */ CDLLNode() { - next = this; + set_next(this); prev = this; } SNMALLOC_FAST_PATH bool is_empty() { - return next == this; + return to_next == 0; } /** @@ -40,21 +55,23 @@ namespace snmalloc { SNMALLOC_ASSERT(!is_empty()); debug_check(); - next->prev = prev; - prev->next = next; + get_next()->prev = prev; + prev->set_next(get_next()); // As this is no longer in the list, check invariant for // neighbouring element. - next->debug_check(); + get_next()->debug_check(); #ifndef NDEBUG - next = nullptr; + set_next(nullptr); prev = nullptr; #endif } SNMALLOC_FAST_PATH CDLLNode* get_next() { - return next; +// TODO: CHERI will require a real pointer +// return next; + return pointer_offset(this, to_next); } SNMALLOC_FAST_PATH CDLLNode* get_prev() @@ -65,10 +82,10 @@ namespace snmalloc SNMALLOC_FAST_PATH void insert_next(CDLLNode* item) { debug_check(); - item->next = next; - next->prev = item; + item->set_next(get_next()); + get_next()->prev = item; item->prev = this; - next = item; + set_next(item); debug_check(); } @@ -76,8 +93,8 @@ namespace snmalloc { debug_check(); item->prev = prev; - prev->next = item; - item->next = this; + prev->set_next(item); + item->set_next(this); prev = item; debug_check(); } @@ -90,14 +107,14 @@ namespace snmalloc void debug_check() { #ifndef NDEBUG - CDLLNode* item = this->next; + CDLLNode* item = get_next(); CDLLNode* p = this; do { SNMALLOC_ASSERT(item->prev == p); p = item; - item = item->next; + item = item->get_next(); } while (item != this); #endif } From 65bb8c18b074656e2a138a7418720498461881e8 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 25 Mar 2020 11:38:13 +0000 Subject: [PATCH 35/37] Make GlobalPlaceholder zero init The GlobalPlaceholder allocator is now a zero init block of memory. This removes various issues for when things are initialised. It is made read-only to we detect write to it on some platforms. --- src/ds/address.h | 20 ++++++++++++++++++++ src/ds/cdllist.h | 4 ++-- src/mem/alloc.h | 31 +++++++++++++++++-------------- src/mem/threadalloc.h | 27 ++++++++++++++++++--------- 4 files changed, 57 insertions(+), 25 deletions(-) diff --git a/src/ds/address.h b/src/ds/address.h index 49c9b9878..22c294107 100644 --- a/src/ds/address.h +++ b/src/ds/address.h @@ -24,6 +24,15 @@ namespace snmalloc return reinterpret_cast(reinterpret_cast(base) + diff); } + /** + * Perform pointer arithmetic and return the adjusted pointer. + */ + template + inline T* pointer_offset_signed(T* base, ptrdiff_t diff) + { + return reinterpret_cast(reinterpret_cast(base) + diff); + } + /** * Cast from a pointer type to an address. */ @@ -115,4 +124,15 @@ namespace snmalloc return static_cast( static_cast(cursor) - static_cast(base)); } + + /** + * Compute the difference in pointers in units of char. This can be used + * across allocations. + */ + inline ptrdiff_t pointer_diff_signed(void* base, void* cursor) + { + return static_cast( + static_cast(cursor) - static_cast(base)); + } + } // namespace snmalloc diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h index 603676c3c..c23fd1fed 100644 --- a/src/ds/cdllist.h +++ b/src/ds/cdllist.h @@ -30,7 +30,7 @@ namespace snmalloc { // TODO: CHERI will need a real pointer too // next = c; - to_next = pointer_diff(c, this); + to_next = pointer_diff_signed(this, c); } public: @@ -71,7 +71,7 @@ namespace snmalloc { // TODO: CHERI will require a real pointer // return next; - return pointer_offset(this, to_next); + return pointer_offset_signed(this, to_next); } SNMALLOC_FAST_PATH CDLLNode* get_prev() diff --git a/src/mem/alloc.h b/src/mem/alloc.h index ba79021ca..60bd82eed 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -165,8 +165,6 @@ namespace snmalloc else return calloc(1, size); #else - stats().alloc_request(size); - // Perform the - 1 on size, so that zero wraps around and ends up on // slow path. if (likely((size - 1) <= (sizeclass_to_size(NUM_SMALL_CLASSES - 1) - 1))) @@ -1010,17 +1008,18 @@ namespace snmalloc SNMALLOC_ASSUME(size <= SLAB_SIZE); sizeclass_t sizeclass = size_to_sizeclass(size); - return small_alloc_inner(sizeclass); + return small_alloc_inner(sizeclass, size); } template - SNMALLOC_FAST_PATH void* small_alloc_inner(sizeclass_t sizeclass) + SNMALLOC_FAST_PATH void* small_alloc_inner(sizeclass_t sizeclass, size_t size) { SNMALLOC_ASSUME(sizeclass < NUM_SMALL_CLASSES); auto& fl = small_fast_free_lists[sizeclass]; void* head = fl.value; if (likely(head != nullptr)) { + stats().alloc_request(size); stats().sizeclass_alloc(sizeclass); // Read the next slot from the memory that's about to be allocated. fl.value = Metaslab::follow_next(head); @@ -1034,9 +1033,9 @@ namespace snmalloc } if (likely(!has_messages())) - return small_alloc_next_free_list(sizeclass); + return small_alloc_next_free_list(sizeclass, size); - return small_alloc_mq_slow(sizeclass); + return small_alloc_mq_slow(sizeclass, size); } /** @@ -1044,18 +1043,18 @@ namespace snmalloc * allocation request. */ template - SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass, size_t size) { handle_message_queue_inner(); - return small_alloc_next_free_list(sizeclass); + return small_alloc_next_free_list(sizeclass, size); } /** * Attempt to find a new free list to allocate from */ template - SNMALLOC_SLOW_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass, size_t size) { size_t rsize = sizeclass_to_size(sizeclass); auto& sl = small_classes[sizeclass]; @@ -1064,6 +1063,7 @@ namespace snmalloc if (likely(!sl.is_empty())) { + stats().alloc_request(size); stats().sizeclass_alloc(sizeclass); SlabLink* link = sl.get_next(); @@ -1072,7 +1072,7 @@ namespace snmalloc return slab->alloc( sl, ffl, rsize, large_allocator.memory_provider); } - return small_alloc_rare(sizeclass); + return small_alloc_rare(sizeclass, size); } /** @@ -1081,14 +1081,15 @@ namespace snmalloc * new free list. */ template - SNMALLOC_SLOW_PATH void* small_alloc_rare(sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH void* small_alloc_rare(sizeclass_t sizeclass, size_t size) { if (likely(!NeedsInitialisation(this))) { + stats().alloc_request(size); stats().sizeclass_alloc(sizeclass); return small_alloc_new_free_list(sizeclass); } - return small_alloc_first_alloc(sizeclass); + return small_alloc_first_alloc(sizeclass, size); } /** @@ -1096,11 +1097,11 @@ namespace snmalloc * then directs the allocation request to the newly created allocator. */ template - SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(sizeclass_t sizeclass) + SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(sizeclass_t sizeclass, size_t size) { auto replacement = InitThreadAllocator(); return reinterpret_cast(replacement) - ->template small_alloc_inner(sizeclass); + ->template small_alloc_inner(sizeclass, size); } /** @@ -1297,6 +1298,7 @@ namespace snmalloc sc->insert(slab); } + stats().alloc_request(size); stats().sizeclass_alloc(sizeclass); return p; } @@ -1364,6 +1366,7 @@ namespace snmalloc chunkmap().set_large_size(p, size); + stats().alloc_request(size); stats().large_alloc(large_class); return p; } diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 9c53f4cee..ff198ebac 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -68,10 +68,19 @@ namespace snmalloc * slabs to allocate from, it will discover that it is the placeholder and * replace itself with the thread-local allocator, allocating one if * required. This avoids a branch on the fast path. + * + * The fake allocator is a zero initialised area of memory of the correct + * size. All data structures used potentially before initialisation must be + * okay with zero init to move to the slow path, that is, zero must signify + * empty. */ - inline GlobalVirtual dummy_memory_provider; - inline Alloc GlobalPlaceHolder( - dummy_memory_provider, SNMALLOC_DEFAULT_CHUNKMAP(), nullptr, true); + inline const char GlobalPlaceHolder[sizeof(Alloc)] = {0}; + + inline Alloc* get_GlobalPlaceHolder() + { + auto a = reinterpret_cast(&GlobalPlaceHolder); + return const_cast(a); + } /** * Common aspects of thread local allocator. Subclasses handle how releasing @@ -85,10 +94,10 @@ namespace snmalloc static inline void inner_release() { auto& per_thread = get_reference(); - if (per_thread != &GlobalPlaceHolder) + if (per_thread != get_GlobalPlaceHolder()) { current_alloc_pool()->release(per_thread); - per_thread = &GlobalPlaceHolder; + per_thread = get_GlobalPlaceHolder(); } } @@ -123,7 +132,7 @@ namespace snmalloc */ static inline Alloc*& get_reference() { - static thread_local Alloc* alloc = &GlobalPlaceHolder; + static thread_local Alloc* alloc = get_GlobalPlaceHolder(); return alloc; } @@ -236,7 +245,7 @@ namespace snmalloc SNMALLOC_SLOW_PATH inline void* init_thread_allocator() { auto*& local_alloc = ThreadAlloc::get_reference(); - if (local_alloc != &GlobalPlaceHolder) + if (local_alloc != get_GlobalPlaceHolder()) { // If someone reuses a noncachable call, then we can end up here. // The allocator has already been initialised. Could either error @@ -244,7 +253,7 @@ namespace snmalloc return local_alloc; } local_alloc = current_alloc_pool()->acquire(); - SNMALLOC_ASSERT(local_alloc != &GlobalPlaceHolder); + SNMALLOC_ASSERT(local_alloc != get_GlobalPlaceHolder()); ThreadAlloc::register_cleanup(); return local_alloc; } @@ -257,7 +266,7 @@ namespace snmalloc */ SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) { - return existing == &GlobalPlaceHolder; + return existing == get_GlobalPlaceHolder(); } #endif } // namespace snmalloc From 50486c074d394447cb83ccd0250d75f423cd018a Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 25 Mar 2020 15:34:43 +0000 Subject: [PATCH 36/37] Comment. --- src/mem/threadalloc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index ff198ebac..7fd7326a2 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -75,9 +75,11 @@ namespace snmalloc * empty. */ inline const char GlobalPlaceHolder[sizeof(Alloc)] = {0}; - inline Alloc* get_GlobalPlaceHolder() { + // This cast is not legal. Effectively, we want a minimal constructor + // for the global allocator as zero, and then a second constructor for + // the rest. This is UB. auto a = reinterpret_cast(&GlobalPlaceHolder); return const_cast(a); } From 4b19611025e0b4be25332063bbefc018436a99de Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 25 Mar 2020 16:28:45 +0000 Subject: [PATCH 37/37] Clang format. --- src/ds/cdllist.h | 12 ++++++------ src/mem/alloc.h | 21 ++++++++++++++------- src/mem/threadalloc.h | 2 +- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h index c23fd1fed..1bc39aad4 100644 --- a/src/ds/cdllist.h +++ b/src/ds/cdllist.h @@ -22,14 +22,14 @@ namespace snmalloc */ ptrdiff_t to_next = 0; -// TODO: CHERI will need a real pointer too -// CDLLNode* next = nullptr; + // TODO: CHERI will need a real pointer too + // CDLLNode* next = nullptr; CDLLNode* prev = nullptr; void set_next(CDLLNode* c) { -// TODO: CHERI will need a real pointer too -// next = c; + // TODO: CHERI will need a real pointer too + // next = c; to_next = pointer_diff_signed(this, c); } @@ -69,8 +69,8 @@ namespace snmalloc SNMALLOC_FAST_PATH CDLLNode* get_next() { -// TODO: CHERI will require a real pointer -// return next; + // TODO: CHERI will require a real pointer + // return next; return pointer_offset_signed(this, to_next); } diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 835070ed4..afe5a10f2 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1012,7 +1012,8 @@ namespace snmalloc } template - SNMALLOC_FAST_PATH void* small_alloc_inner(sizeclass_t sizeclass, size_t size) + SNMALLOC_FAST_PATH void* + small_alloc_inner(sizeclass_t sizeclass, size_t size) { SNMALLOC_ASSUME(sizeclass < NUM_SMALL_CLASSES); auto& fl = small_fast_free_lists[sizeclass]; @@ -1033,7 +1034,8 @@ namespace snmalloc } if (likely(!has_messages())) - return small_alloc_next_free_list(sizeclass, size); + return small_alloc_next_free_list( + sizeclass, size); return small_alloc_mq_slow(sizeclass, size); } @@ -1043,18 +1045,21 @@ namespace snmalloc * allocation request. */ template - SNMALLOC_SLOW_PATH void* small_alloc_mq_slow(sizeclass_t sizeclass, size_t size) + SNMALLOC_SLOW_PATH void* + small_alloc_mq_slow(sizeclass_t sizeclass, size_t size) { handle_message_queue_inner(); - return small_alloc_next_free_list(sizeclass, size); + return small_alloc_next_free_list( + sizeclass, size); } /** * Attempt to find a new free list to allocate from */ template - SNMALLOC_SLOW_PATH void* small_alloc_next_free_list(sizeclass_t sizeclass, size_t size) + SNMALLOC_SLOW_PATH void* + small_alloc_next_free_list(sizeclass_t sizeclass, size_t size) { size_t rsize = sizeclass_to_size(sizeclass); auto& sl = small_classes[sizeclass]; @@ -1081,7 +1086,8 @@ namespace snmalloc * new free list. */ template - SNMALLOC_SLOW_PATH void* small_alloc_rare(sizeclass_t sizeclass, size_t size) + SNMALLOC_SLOW_PATH void* + small_alloc_rare(sizeclass_t sizeclass, size_t size) { if (likely(!NeedsInitialisation(this))) { @@ -1097,7 +1103,8 @@ namespace snmalloc * then directs the allocation request to the newly created allocator. */ template - SNMALLOC_SLOW_PATH void* small_alloc_first_alloc(sizeclass_t sizeclass, size_t size) + SNMALLOC_SLOW_PATH void* + small_alloc_first_alloc(sizeclass_t sizeclass, size_t size) { auto replacement = InitThreadAllocator(); return reinterpret_cast(replacement) diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h index 7fd7326a2..969d1dc89 100644 --- a/src/mem/threadalloc.h +++ b/src/mem/threadalloc.h @@ -68,7 +68,7 @@ namespace snmalloc * slabs to allocate from, it will discover that it is the placeholder and * replace itself with the thread-local allocator, allocating one if * required. This avoids a branch on the fast path. - * + * * The fake allocator is a zero initialised area of memory of the correct * size. All data structures used potentially before initialisation must be * okay with zero init to move to the slow path, that is, zero must signify