From 6b03ebd5c3fc8fe851a533d1044cb17455ad4971 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Thu, 9 May 2024 19:50:16 -0700 Subject: [PATCH] [wasm] Add custom mmap/munmap implementation for anonymous mappings (#101871) * emscripten libc implements mmap/munmap as a broken adapter on top of malloc (not calloc), which means it has no choice but to invoke memset on every allocation in order to provide properly zeroed bytes for our allocation requests. this commit adds a custom mmap/munmap implementation that can skip zeroing already-zeroed pages * re-enable freeing of pages in sgen on wasm if custom mmap is active * add runtime option for custom mmap * add warning switches to fix build on debian --- src/mono/mono/sgen/sgen-internal.c | 8 +- src/mono/mono/sgen/sgen-marksweep.c | 6 +- src/mono/mono/utils/CMakeLists.txt | 2 +- src/mono/mono/utils/lock-free-alloc.h | 1 + src/mono/mono/utils/mono-mmap-wasm.c | 34 +- src/mono/mono/utils/mono-mmap.h | 3 +- src/mono/mono/utils/mono-wasm-pagemgr.c | 482 ++++++++++++++++++++++++ src/mono/mono/utils/mono-wasm-pagemgr.h | 87 +++++ src/mono/mono/utils/options-def.h | 6 + src/native/libs/CMakeLists.txt | 2 + 10 files changed, 623 insertions(+), 8 deletions(-) create mode 100644 src/mono/mono/utils/mono-wasm-pagemgr.c create mode 100644 src/mono/mono/utils/mono-wasm-pagemgr.h diff --git a/src/mono/mono/sgen/sgen-internal.c b/src/mono/mono/sgen/sgen-internal.c index c026d47b1a597..6ff3e6fda9808 100644 --- a/src/mono/mono/sgen/sgen-internal.c +++ b/src/mono/mono/sgen/sgen-internal.c @@ -273,7 +273,7 @@ sgen_report_internal_mem_usage (void) void sgen_init_internal_allocator (void) { - int i, size; + int i; for (i = 0; i < INTERNAL_MEM_MAX; ++i) fixed_type_allocator_indexes [i] = -1; @@ -284,7 +284,10 @@ sgen_init_internal_allocator (void) mono_lock_free_allocator_init_allocator (&allocators [i], &size_classes [i], MONO_MEM_ACCOUNT_SGEN_INTERNAL); } - for (size = mono_pagesize (); size <= LOCK_FREE_ALLOC_SB_MAX_SIZE; size <<= 1) { + // FIXME: This whole algorithm is broken on WASM due to its 64KB page size. + // Previously SB_MAX_SIZE was < mono_pagesize, so none of this ran. +#ifndef HOST_WASM + for (int size = mono_pagesize (); size <= LOCK_FREE_ALLOC_SB_MAX_SIZE; size <<= 1) { int max_size = (LOCK_FREE_ALLOC_SB_USABLE_SIZE (size) / 2) & ~(SIZEOF_VOID_P - 1); /* * we assert that allocator_sizes contains the biggest possible object size @@ -297,6 +300,7 @@ sgen_init_internal_allocator (void) if (size < LOCK_FREE_ALLOC_SB_MAX_SIZE) g_assert (block_size (max_size + 1) == size << 1); } +#endif } #endif diff --git a/src/mono/mono/sgen/sgen-marksweep.c b/src/mono/mono/sgen/sgen-marksweep.c index 1767d4712def1..a8160dbeee372 100644 --- a/src/mono/mono/sgen/sgen-marksweep.c +++ b/src/mono/mono/sgen/sgen-marksweep.c @@ -33,6 +33,7 @@ #include "mono/sgen/sgen-client.h" #include "mono/utils/mono-memory-model.h" #include "mono/utils/mono-proclib.h" +#include "mono/utils/options.h" static int ms_block_size; @@ -2133,12 +2134,15 @@ major_free_swept_blocks (size_t section_reserve) { SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Sweeping must have finished before freeing blocks"); -#if defined(HOST_WIN32) || defined(HOST_ORBIS) || defined (HOST_WASM) +#if defined(HOST_WIN32) || defined(HOST_ORBIS) /* * sgen_free_os_memory () asserts in mono_vfree () because windows doesn't like freeing the middle of * a VirtualAlloc ()-ed block. */ return; +#elif defined(HOST_WASM) + if (!mono_opt_wasm_mmap) + return; #endif { diff --git a/src/mono/mono/utils/CMakeLists.txt b/src/mono/mono/utils/CMakeLists.txt index f4b180f98d397..e5bd089b8203c 100644 --- a/src/mono/mono/utils/CMakeLists.txt +++ b/src/mono/mono/utils/CMakeLists.txt @@ -206,7 +206,7 @@ set(utils_arch_sources "${utils_arch_sources};mono-hwcap-riscv.c") elseif(TARGET_S390X) set(utils_arch_sources "${utils_arch_sources};mono-hwcap-s390x.c") elseif(TARGET_WASM) -set(utils_arch_sources "${utils_arch_sources};mono-hwcap-wasm.c;mono-mmap-wasm.c") +set(utils_arch_sources "${utils_arch_sources};mono-hwcap-wasm.c;mono-mmap-wasm.c;mono-wasm-pagemgr.c") elseif(TARGET_WASI) set(utils_arch_sources "${utils_arch_sources};mono-hwcap-wasm.c") elseif(TARGET_POWERPC OR TARGET_POWERPC64) diff --git a/src/mono/mono/utils/lock-free-alloc.h b/src/mono/mono/utils/lock-free-alloc.h index cca1d83eb6614..5aa4e1ea90722 100644 --- a/src/mono/mono/utils/lock-free-alloc.h +++ b/src/mono/mono/utils/lock-free-alloc.h @@ -45,6 +45,7 @@ typedef struct { MonoMemAccountType account_type; } MonoLockFreeAllocator; +// FIXME: On WASM the page size is 64KB, so this isn't enough. #define LOCK_FREE_ALLOC_SB_MAX_SIZE 16384 #define LOCK_FREE_ALLOC_SB_HEADER_SIZE (sizeof (gpointer)) #define LOCK_FREE_ALLOC_SB_USABLE_SIZE(block_size) ((block_size) - LOCK_FREE_ALLOC_SB_HEADER_SIZE) diff --git a/src/mono/mono/utils/mono-mmap-wasm.c b/src/mono/mono/utils/mono-mmap-wasm.c index b2f417b086038..aef7bfb9c3bed 100644 --- a/src/mono/mono/utils/mono-mmap-wasm.c +++ b/src/mono/mono/utils/mono-mmap-wasm.c @@ -22,6 +22,9 @@ #include "mono-proclib.h" #include #include +#include + +#include "mono-wasm-pagemgr.h" #define BEGIN_CRITICAL_SECTION do { \ MonoThreadInfo *__info = mono_thread_info_current_unchecked (); \ @@ -34,6 +37,9 @@ int mono_pagesize (void) { + if (mono_opt_wasm_mmap) + return MWPM_PAGE_SIZE; + static int saved_pagesize = 0; if (saved_pagesize) @@ -108,7 +114,16 @@ valloc_impl (void *addr, size_t size, int flags, MonoMemAccountType type) mflags |= MAP_PRIVATE; BEGIN_CRITICAL_SECTION; - ptr = mmap (addr, size, prot, mflags, -1, 0); + if (mono_opt_wasm_mmap) { + // FIXME: Make this work if the requested address range is free + if ((flags & MONO_MMAP_FIXED) && addr) + return NULL; + + ptr = mwpm_alloc_range (size, 1); + if (!ptr) + return NULL; + } else + ptr = mmap (addr, size, prot, mflags, -1, 0); END_CRITICAL_SECTION; if (ptr == MAP_FAILED) @@ -142,6 +157,10 @@ typedef struct { void* mono_valloc_aligned (size_t size, size_t alignment, int flags, MonoMemAccountType type) { + // We don't need padding if the alignment is compatible with the page size + if (mono_opt_wasm_mmap && ((MWPM_PAGE_SIZE % alignment) == 0)) + return valloc_impl (NULL, size, flags, type); + /* Allocate twice the memory to be able to put the block on an aligned address */ char *mem = (char *) valloc_impl (NULL, size + alignment, flags, type); char *aligned; @@ -175,13 +194,22 @@ mono_vfree (void *addr, size_t length, MonoMemAccountType type) * mono_valloc_align (), free the original mapping. */ BEGIN_CRITICAL_SECTION; - munmap (info->addr, info->size); + if (mono_opt_wasm_mmap) + mwpm_free_range (info->addr, info->size); + else + munmap (info->addr, info->size); END_CRITICAL_SECTION; g_free (info); g_hash_table_remove (valloc_hash, addr); } else { + // FIXME: We could be trying to unmap part of an aligned mapping, in which case the + // hash lookup failed because addr isn't exactly the start of the mapping. + // Ideally if the custom page manager is enabled, we won't have done aligned alloc. BEGIN_CRITICAL_SECTION; - munmap (addr, length); + if (mono_opt_wasm_mmap) + mwpm_free_range (addr, length); + else + munmap (addr, length); END_CRITICAL_SECTION; } diff --git a/src/mono/mono/utils/mono-mmap.h b/src/mono/mono/utils/mono-mmap.h index 64591f106eab2..fcf24bbea59c7 100644 --- a/src/mono/mono/utils/mono-mmap.h +++ b/src/mono/mono/utils/mono-mmap.h @@ -22,7 +22,8 @@ enum { MONO_MMAP_ANON = 1 << 6, MONO_MMAP_FIXED = 1 << 7, MONO_MMAP_32BIT = 1 << 8, - MONO_MMAP_JIT = 1 << 9 + MONO_MMAP_JIT = 1 << 9, + MONO_MMAP_NOZERO = 1 << 10, }; typedef enum { diff --git a/src/mono/mono/utils/mono-wasm-pagemgr.c b/src/mono/mono/utils/mono-wasm-pagemgr.c new file mode 100644 index 0000000000000..17f7dd4c6d63c --- /dev/null +++ b/src/mono/mono/utils/mono-wasm-pagemgr.c @@ -0,0 +1,482 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "mono-wasm-pagemgr.h" +#include +#include +#include +#include +#include + +#ifndef DISABLE_THREADS +#include +#endif + +// #define MWPM_LOGGING +// #define MWPM_STATS + +typedef enum { + MWPM_MARK_DEAD_PAGES, + MWPM_MARK_NEW_PAGES, + MWPM_FREE_TO_ALLOCATED, + MWPM_FREE_TO_ALLOCATED_ZEROED, + MWPM_ALLOCATED_TO_FREE, +} page_action; + +#define is_page_free(state) (state & MWPM_FREE_BIT) +#define is_page_owned(state) (state & MWPM_STATE_MASK) +#define is_page_in_use(state) ((state & MWPM_STATE_MASK) == MWPM_ALLOCATED) +#define get_page_skip_count(state) (state & MWPM_SKIP_MASK) + +typedef uint8_t mwpm_page_state; + +static mono_mutex_t mutex; +static uint8_t page_table[MWPM_MAX_PAGES]; +#ifdef DISABLE_THREADS +static uint8_t is_initialized = 0; +#else +static once_flag is_initialized = ONCE_FLAG_INIT; +#endif +static uint32_t + // The index of the first page that we control. Not all pages after this + // necessarily belong to us, but scans can start here. + first_controlled_page_index = UINT32_MAX, + // The index of the last page we've allocated. Not all pages between this + // and first_controlled_page_index belong to us, but scans can end here. + last_controlled_page_index = 0; +static uint8_t *prev_waste_start = NULL, + *prev_waste_end = NULL; + +static inline void * +address_from_page_index (uint32_t page_index) { + uint64_t address = ((uint64_t)page_index * MWPM_PAGE_SIZE); + g_assert (address < UINT32_MAX); + return (void *)(uint32_t)address; +} + +static inline uint32_t +first_page_from_address (void *addr) { + return ((uint64_t)addr) / MWPM_PAGE_SIZE; +} + +static inline uint32_t +page_count_from_size (size_t size) { + return ((size + MWPM_PAGE_SIZE - 1) / MWPM_PAGE_SIZE); +} + +static inline uint32_t +last_page_of_range (void *addr, size_t size) { + uint32_t page_count_rounded_up = page_count_from_size (size), + first_page = first_page_from_address (addr); + return first_page + page_count_rounded_up - 1; +} + +static inline const char * +get_state_name (uint8_t state) { + switch (state & MWPM_STATE_MASK) { + case MWPM_EXTERNAL: + return "external"; + case MWPM_FREE_DIRTY: + return "dirty"; + case MWPM_FREE_ZEROED: + return "zeroed"; + case MWPM_ALLOCATED: + return "in use"; + default: + g_assert_not_reached (); + } +} + +static inline mwpm_page_state +encode_page_state (uint8_t bits, uint32_t skip_count) { + // We encode state into the page table like so: + // The top two bits are the free bit and the meta bit. + // For a free page, the meta bit indicates whether it is zeroed. + // For an occupied page, the meta bit indicates whether we control it. + // The remaining 6 bits encode the "skip count", which is a promise that + // the following N pages have the same state as the current page. + // The skip count allows us to jump forward safely during scans for free + // pages so that we don't have to do a full linear scan of the page table. + if (skip_count > MWPM_SKIP_MASK) + skip_count = MWPM_SKIP_MASK; + + return (bits & MWPM_STATE_MASK) | (skip_count & MWPM_SKIP_MASK); +} + +static void +cleanup_preceding_pages (uint32_t successor_page) { + uint32_t first_page = successor_page > 64 + ? successor_page - 64 + : 0; + + for (uint32_t i = first_page; i < successor_page; i++) { + mwpm_page_state page_state = page_table[i]; + // for a skip_count of 0 we will skip exactly one page (otherwise we would + // get stuck on pages with a 0 skip count). so the maximum skip value is + // distance - 1 to produce an actual skip of distance pages + uint32_t maximum_skip_value = successor_page - i - 1; + if (maximum_skip_value > MWPM_SKIP_MASK) + maximum_skip_value = MWPM_SKIP_MASK; + if (get_page_skip_count (page_state) <= maximum_skip_value) + continue; + +#if defined(MWPM_LOGGING) + g_print ( + "Repairing invalid skip value in predecessor page %u: %s %u -> %u\n", + i, get_state_name (page_state), get_page_skip_count (page_state), + maximum_skip_value + ); +#endif + page_table[i] = encode_page_state (page_state & MWPM_STATE_MASK, maximum_skip_value); + } +} + +static void +transition_page_states (page_action action, uint32_t first_page, uint32_t page_count) { + if (page_count == 0) + return; + + g_assert (first_page < MWPM_MAX_PAGES); + + uint32_t last_page = first_page + (page_count - 1); + g_assert (last_page >= first_page); + + g_assert (last_page < MWPM_MAX_PAGES); + + // POSIX specifies that munmap () on an address range that isn't mapped has no, + // effect, so we need to make sure that it's harmless to try and unmap pages we + // don't control. We can't use memset since it might trample UNKNOWN pages. + for (uint32_t i = first_page, skip_value = page_count - 1; i <= last_page; i++) { + mwpm_page_state page_state = page_table[i]; + + // TODO: Remove the duplication in here + switch (action) { + case MWPM_MARK_DEAD_PAGES: + g_assert (!is_page_owned (page_state)); + page_table[i] = encode_page_state (MWPM_EXTERNAL, skip_value--); + break; + case MWPM_MARK_NEW_PAGES: + g_assert (!is_page_owned (page_state)); + page_table[i] = encode_page_state (MWPM_FREE_ZEROED, skip_value--); + break; + case MWPM_FREE_TO_ALLOCATED: + g_assert (is_page_free (page_state)); + page_table[i] = encode_page_state (MWPM_ALLOCATED, skip_value--); + break; + case MWPM_FREE_TO_ALLOCATED_ZEROED: + g_assert (is_page_free (page_state)); + page_table[i] = encode_page_state (MWPM_ALLOCATED, skip_value--); + if (!(page_state & MWPM_META_BIT)) + // TODO: Don't recalculate the address from scratch each time + memset (address_from_page_index (i), 0, MWPM_PAGE_SIZE); + break; + case MWPM_ALLOCATED_TO_FREE: + // FIXME: Can we generate correct skip_value here? This is used + // by munmap, which is valid to call even on pages that are not mapped + if (is_page_in_use (page_state)) + page_table[i] = encode_page_state (MWPM_FREE_DIRTY, 0); + break; + default: + g_assert_not_reached (); + break; + } + } + + if (action == MWPM_ALLOCATED_TO_FREE) + cleanup_preceding_pages (first_page); +} + +static void +print_stats () { +#if defined(MWPM_LOGGING) || defined(MWPM_STATS) + uint32_t in_use = 0, free = 0, unallocated = 0, + max_run = 0, current_run = 0; + + for (uint32_t i = first_controlled_page_index; i <= last_controlled_page_index; i++) { + switch (page_table[i] & MWPM_STATE_MASK) { + case MWPM_ALLOCATED: + in_use++; + current_run = 0; + break; + + case MWPM_FREE_DIRTY: + case MWPM_FREE_ZEROED: + free++; + current_run++; + if (current_run > max_run) + max_run = current_run; + break; + + default: + unallocated++; + current_run = 0; + break; + } + } + + uint32_t total = in_use + free; // + unallocated; + g_print ( + "sbrk(0)==%u. %u pages in use (%f%%), %u pages free, %u pages unknown. largest possible allocation: %u pages\n", + (uint32_t)sbrk(0), in_use, in_use * 100.0 / total, free, unallocated, max_run + ); +#endif +} + +static void * +acquire_new_pages_initialized (uint32_t page_count) { + if (page_count < 1) + return NULL; + // Pad the allocation with an extra page, this will create waste bytes at the + // start and end we can use to align the resulting allocation. We will try + // to recover the waste if possible + uint64_t bytes = (page_count + 1) * MWPM_PAGE_SIZE; + uint32_t recovered_bytes = 0; + if (bytes >= UINT32_MAX) + return NULL; + + // We know that on WASM, sbrk grows the heap as necessary in order to return, + // a region of N zeroed bytes, which isn't necessarily aligned or page-sized + uint8_t *allocation = sbrk ((uint32_t)bytes), + *allocation_end = allocation + bytes; + + if (allocation == (uint8_t *)-1) { +#ifdef MWPM_LOGGING + g_print ("mwpm failed to acquire memory\n"); +#endif + return NULL; + } + + g_assert (allocation_end != allocation); + + // If nobody else has called sbrk since we did, stitch the allocations together + // to eliminate the wasted page in the middle. + if (prev_waste_start && (prev_waste_end == allocation)) { + recovered_bytes = allocation - prev_waste_start; + allocation = prev_waste_start; + } else { + // Update the dead pages that were allocated by someone else via sbrk() + // so that they have skip data + uint32_t first_dead_page = first_page_from_address (prev_waste_end), + dead_page_count = page_count_from_size (allocation - prev_waste_end); + transition_page_states (MWPM_MARK_DEAD_PAGES, first_dead_page, dead_page_count); + } + + uint8_t *result = allocation; + // Unfortunately emscripten libc doesn't page-align sbrk's return value. + uint32_t realignment = MWPM_PAGE_SIZE - (((uint64_t)result) % MWPM_PAGE_SIZE); + if (realignment < MWPM_PAGE_SIZE) { + result += realignment; + g_assert ((((uint64_t)result) % MWPM_PAGE_SIZE) == 0); + } + + // Figure out how many wasted bytes are hanging off the end of our last page. + page_count = (allocation_end - result) / MWPM_PAGE_SIZE; + g_assert (page_count); + // Record the region of wasted bytes we allocated, so we can try to use it later. + prev_waste_start = result + (page_count * MWPM_PAGE_SIZE); + prev_waste_end = allocation_end; + + // Mark all the allocated pages as free and zeroed + uint32_t first_page_index = first_page_from_address (result), + last_page_index = first_page_index + page_count - 1; + + if ((first_page_index >= MWPM_MAX_PAGES) || (last_page_index >= MWPM_MAX_PAGES)) { +#ifdef MWPM_LOGGING + g_print ("mwpm failed to acquire pages because resulting page index was out of range: %u-%u\n", first_page_index, last_page_index); +#endif + return NULL; + } + + // g_print ("mwpm allocated %u bytes (%u pages) starting at @%u (%u recovered)\n", (uint32_t)bytes, page_count, (uint32_t)allocation, recovered_bytes); + transition_page_states (MWPM_MARK_NEW_PAGES, first_page_index, page_count); + print_stats (); + last_controlled_page_index = last_page_index; + return result; +} + +static inline void +free_pages_initialized (uint32_t first_page, uint32_t page_count) { + // expected behavior: freeing UNKNOWN pages leaves them unknown. + // freeing FREE_ZEROED pages leaves them zeroed. + // freeing ALLOCATED or FREE_DIRTY pages makes them FREE_DIRTY. + transition_page_states (MWPM_ALLOCATED_TO_FREE, first_page, page_count); +} + +static uint32_t +find_n_free_pages_in_range (uint32_t start_scan_where, uint32_t end_scan_where, uint32_t page_count) { + if (page_count == 0) + return UINT32_MAX; + + uint32_t i = start_scan_where; + + while (i <= end_scan_where) { + uint8_t found_obstruction = 0; + uint32_t j = i + page_count - 1; + if (j > last_controlled_page_index) + break; + + // Avoid worst case scenario of starting on an occupied page, then scanning + // backwards through a bunch of free pages to arrive at the occupied one + mwpm_page_state page_state = page_table[i]; + if (!is_page_free (page_state)) { + uint32_t skip_count = get_page_skip_count (page_state) + 1; + if (skip_count < 1) + skip_count = 1; + i += skip_count; + +#ifdef ENABLE_CHECKED_BUILD + g_assert (!is_page_free (page_table[i - 1])); +#endif + +#ifdef MWPM_LOGGING + if (skip_count > 1) + g_print ( + "scan skipping %u %s page(s) (head); new page is #%u with state %s\n", + skip_count, get_state_name (page_state), + i, get_state_name (page_table[i]) + ); +#endif + + continue; + } + + // TODO: If we find a free page with a skip count in it, that would indicate + // that there are N sequential free pages left we can claim without doing + // the scan below. + + // Scan backwards from the last candidate page to look for any non-free pages + // the first non-free page we find is the next place we will search from. + for (; j >= i; j--) { + page_state = page_table[j]; + + if (!is_page_free (page_state)) { + // Skip multiple pages + uint32_t skip_count = get_page_skip_count (page_state) + 1; + if (skip_count < 1) + skip_count = 1; + i = j + skip_count; + +#ifdef ENABLE_CHECKED_BUILD + g_assert (!is_page_free (page_table[i - 1])); +#endif + +#ifdef MWPM_LOGGING + if (skip_count > 1) + g_print ( + "scan skipping %u %s page(s) (tail); new page is #%u with state %s\n", + skip_count, get_state_name (page_state), + i, get_state_name (page_table[i]) + ); +#endif + found_obstruction = 1; + break; + } + } + + if (found_obstruction) + continue; + + // We scanned page_count pages starting from i and they were all free. + return i; + } + + return UINT32_MAX; +} + +// Scans all controlled pages to look for at least page_count free pages. +static uint32_t +find_n_free_pages (uint32_t page_count) { + // Start scanning from the beginning. This ensures we will try to grab small allocations + // from the front of the page table, and large allocations from anywhere we can find. + // This does make scans slower, but other approaches I tried have much worse fragmentation. + uint32_t result = find_n_free_pages_in_range (first_controlled_page_index, last_controlled_page_index, page_count); + return result; +} + +static void +mwpm_init () { + mono_os_mutex_init_recursive (&mutex); + // Set the entire page table to 'unknown state'. As we acquire pages from sbrk, we will + // set those respective ranges in the table to a known state. + memset (page_table, MWPM_EXTERNAL, sizeof(page_table)); + void *first_controlled_page_address = acquire_new_pages_initialized (MWPM_MINIMUM_PAGE_COUNT); + g_assert (first_controlled_page_address); + first_controlled_page_index = first_page_from_address (first_controlled_page_address); +} + +static inline void +mwpm_ensure_initialized () { +#ifdef DISABLE_THREADS + if (is_initialized) + return; + is_initialized = 1; + mwpm_init (); +#else + call_once (&is_initialized, mwpm_init); +#endif +} + +void * +mwpm_alloc_range (size_t size, uint8_t zeroed) { + void *result = NULL; + if (!size) + return result; + + mwpm_ensure_initialized (); + mono_os_mutex_lock (&mutex); + + uint32_t page_count = page_count_from_size (size), + first_existing_page = find_n_free_pages (page_count), + allocation_page_count = page_count; + + // If we didn't find existing pages to service our alloc, + if (first_existing_page == UINT32_MAX) { + // g_print ("mwpm could not find %u free pages\n", page_count); + if (allocation_page_count < MWPM_MINIMUM_PAGE_COUNT) + allocation_page_count = MWPM_MINIMUM_PAGE_COUNT; + // Ensure we have space for the whole allocation + void *start_of_new_pages = acquire_new_pages_initialized (allocation_page_count); + if (start_of_new_pages) { + // FIXME: Scan backwards from the new allocation to look for free pages + // before it that we can use to reduce fragmentation + result = start_of_new_pages; + } else { +#ifdef MWPM_LOGGING + g_print ("mwpm failed to acquire new pages\n"); +#endif + goto exit; + } + } else { + result = address_from_page_index (first_existing_page); + // g_print ("mwpm found %u free pages at %u\n", page_count, (uint32_t) result); + } + + if (!result) + goto exit; + + uint32_t first_result_page = first_page_from_address (result); + transition_page_states (zeroed ? MWPM_FREE_TO_ALLOCATED_ZEROED : MWPM_FREE_TO_ALLOCATED, first_result_page, page_count); + +#ifdef MWPM_LOGGING + g_print ("mwpm allocated %u bytes at %u\n", size, (uint32_t)result); +#endif + +exit: + mono_os_mutex_unlock (&mutex); + return result; +} + +void +mwpm_free_range (void *base, size_t size) { + mwpm_ensure_initialized (); + + mono_os_mutex_lock (&mutex); + uint32_t first_page = first_page_from_address (base), + page_count = page_count_from_size (size); + free_pages_initialized (first_page, page_count); + mono_os_mutex_unlock (&mutex); +#ifdef MWPM_LOGGING + g_print ("mwpm freed %u bytes at %u\n", size, (uint32_t)base); +#endif +} diff --git a/src/mono/mono/utils/mono-wasm-pagemgr.h b/src/mono/mono/utils/mono-wasm-pagemgr.h new file mode 100644 index 0000000000000..1720ffaf0967b --- /dev/null +++ b/src/mono/mono/utils/mono-wasm-pagemgr.h @@ -0,0 +1,87 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Provides a simple, efficient implementation of anonymous mmap/munmap for WASM. +// The emscripten libc version of mmap for anonymous mappings is slow during startup +// because it spends a lot of time zeroing already-zeroed pages, and in some cases +// the allocated pages will then get zeroed one more time by a malloc0 operation. + +// This is distinct from malloc/free in that we support freeing a subset of an allocation, +// i.e. you can allocate 4 pages and then free page 1, leaving pages 0/2/3 allocated. + +// Note that pages mapped by MWPM can't be unmapped by libc, and vice versa. + +#ifndef __MONO_UTILS_WASM_PAGEMGR_H__ +#define __MONO_UTILS_WASM_PAGEMGR_H__ + +#ifdef HOST_WASI +#ifndef DISABLE_THREADS +#error MWPM does not support multithreaded WASI due to lack of call_once +#endif +#endif + +#include +#include + +// 64KB (WASM native page size) +// NOTE: emscripten sbrk() allows you to allocate less than 1 page, and doesn't +// return page-aligned addresses. So this value can theoretically be smaller. +// Shrinking this would reduce wasted memory if callers try to mmap less than a +// whole page. My initial testing didn't show big improvements though, and it +// would make mmap (and to a lesser degree, munmap) have to scan/touch more pages. +// I tested with 16KB pages and it wasn't an improvement over 64KB. +// 8KB pages produces a slight reduction in total memory usage (1.3%). 4KB is worse. +#define MWPM_PAGE_SIZE (64 * 1024) + +// 4GB (even though JS can't consistently handle addresses above 2GB) +// System.Text.JSON.Tests needs to allocate more than 2GB... +#define MWPM_MAX_MEMORY ((1024UL * 1024UL * 1024UL) * 4UL) + +// The proper definition compiles down to 0 for some reason even if I put typecasts +// around it +// #define MWPM_MAX_PAGES (uint32_t)(MWPM_MAX_MEMORY / MWPM_PAGE_SIZE) +#define MWPM_MAX_PAGES (64 * 1024) + +// When allocating new zeroed pages, always allocate at least this many. +// This ensures that we don't waste a bunch of time allocating 1-2 pages at once. +// sbrk() also doesn't return page-aligned addresses, so this produces fewer +// wasted page fragments and in general ensures we can service larger allocations. +// Setting this constant incorrectly will cause higher fragmentation and higher +// memory usage, potentially causing OOM. Data from S.T.J test suite: +// (minimum count) (total pages allocated) (largest available space) +// 16 13677 363 +// 24 14675 340 +// 30 14653 192 +// 32 11399 463 +// 34 14412 338 +// 48 14460 461 +// 64 11767 230 +// 96 12986 96 +// 128 14059 321 +// 192 14686 289 +// 256 12281 256 +#define MWPM_MINIMUM_PAGE_COUNT 32 + +#define MWPM_FREE_BIT 0b10000000 +#define MWPM_META_BIT 0b01000000 +#define MWPM_STATE_MASK 0b11000000 +#define MWPM_SKIP_MASK 0b00111111 + +#define MWPM_FREE_ZEROED (uint8_t)(MWPM_FREE_BIT | MWPM_META_BIT) +#define MWPM_FREE_DIRTY (uint8_t)(MWPM_FREE_BIT) +#define MWPM_ALLOCATED (uint8_t)(MWPM_META_BIT) +#define MWPM_EXTERNAL (uint8_t)(0) + +// Allocate enough pages to hold size bytes of data, optionally ensuring they are zeroed. +// Zeroing memory on wasm is somewhat expensive, so use this option wisely! +void * +mwpm_alloc_range (size_t size, uint8_t zeroed); + +// Free all the pages containing the memory range from base to base+size-1. +// If the specified range does not occupy an entire page, the page will still +// be freed! This matches the specified behavior of posix munmap. +// base must be a multiple of MWPM_PAGE_SIZE. +void +mwpm_free_range (void *base, size_t size); + +#endif diff --git a/src/mono/mono/utils/options-def.h b/src/mono/mono/utils/options-def.h index 3af994cb325b2..bd33aea9e23b7 100644 --- a/src/mono/mono/utils/options-def.h +++ b/src/mono/mono/utils/options-def.h @@ -59,6 +59,12 @@ DEFINE_BOOL_READONLY(readonly_flag, "readonly-flag", FALSE, "Example") DEFINE_BOOL(wasm_exceptions, "wasm-exceptions", FALSE, "Enable codegen for WASM exceptions") DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assemblies referenced by AOT images lazily") +#ifdef DISABLE_THREADS +DEFINE_BOOL(wasm_mmap, "wasm-mmap", TRUE, "Enable custom memory manager for WASM") +#else +// Disabled by default for MT because it breaks strcmp somehow (??????) +DEFINE_BOOL(wasm_mmap, "wasm-mmap", FALSE, "Enable custom memory manager for WASM") +#endif #if HOST_BROWSER DEFINE_BOOL(interp_pgo_recording, "interp-pgo-recording", FALSE, "Record interpreter tiering information for automatic PGO") diff --git a/src/native/libs/CMakeLists.txt b/src/native/libs/CMakeLists.txt index 26e619844469c..f22c93336d742 100644 --- a/src/native/libs/CMakeLists.txt +++ b/src/native/libs/CMakeLists.txt @@ -118,6 +118,8 @@ if (CLR_CMAKE_TARGET_UNIX OR CLR_CMAKE_TARGET_BROWSER OR CLR_CMAKE_TARGET_WASI) add_compile_options(-Wno-cast-align) add_compile_options(-Wno-typedef-redefinition) add_compile_options(-Wno-c11-extensions) + add_compile_options(-Wno-pre-c11-compat) # fixes build on Debian + add_compile_options(-Wno-unknown-warning-option) # unknown warning option '-Wno-pre-c11-compat' add_compile_options(-Wno-thread-safety-analysis) if (CLR_CMAKE_TARGET_BROWSER OR CLR_CMAKE_TARGET_WASI) add_compile_options(-Wno-unsafe-buffer-usage)