From f245b7592074720db5364141f96bded9ddf5d7e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 9 Jul 2020 14:13:28 +0300 Subject: [PATCH 01/20] Implement the GC in Rust This implements the current garbage collector in Rust. No changes were made to the GC design -- it's just ports the one implemented in code generator to Rust. The goals are: - Evaluate Rust for Motoko's RTS implementation - Make the collector easier to read, understand, modify, and extend. Currently passes the tests locally. We can't run this branch on CI yet as it needs to download rustc nightly and xargo and the domains are not allowed on the CI. I think in the final version we'll have to build rustc outselves instead of downloading. (Nightly rustc is needed as "core" distributed with rustc is not built with PIC relocation model on wam32, so we can't use it to generate a shared wasm32 library) Main changes: - New Rust crate "motoko-rts" introduced, which currently implements the garbage collector. It also has some utilities for printing the heap or individual objects, to be used when debugging. - Nix files updated to download rustc and xargo. These are used to build Rust's "core" library with PIC relocation model for wasm32. - We no longer build memset and memcpy of musl as those are provided by Rust's "core" now. The main algorithm is in `gc.rs`. Rest of the Rust files are helpers, mainly for debugging. Other changes: - I had to update lots of ic-ref-run outputs. See #1854 for the details. Remaining work and issues: - Figure out how to use rustc nightly (with PIC wasm32 libraries) in CI. Note to reviewers: - The main algorithm is in `gc.rs`, everything else is helpers. Start reading from `gc.rs`. --- rts/motoko-rts/src/alloc.rs | 41 +++ rts/motoko-rts/src/gc.rs | 420 +++++++++++++++++++++++++++++ rts/motoko-rts/src/lib.rs | 4 + rts/motoko-rts/src/types.rs | 148 +++++++++++ src/codegen/compile.ml | 514 ++++-------------------------------- 5 files changed, 667 insertions(+), 460 deletions(-) create mode 100644 rts/motoko-rts/src/alloc.rs create mode 100644 rts/motoko-rts/src/gc.rs create mode 100644 rts/motoko-rts/src/types.rs diff --git a/rts/motoko-rts/src/alloc.rs b/rts/motoko-rts/src/alloc.rs new file mode 100644 index 00000000000..a8a0d1de432 --- /dev/null +++ b/rts/motoko-rts/src/alloc.rs @@ -0,0 +1,41 @@ +//! Implements allocation routines used by the generated code and the GC. + +use core::arch::wasm32; + +use crate::gc; +use crate::rts_trap_with; +use crate::types::{bytes_to_words, skew, words_to_bytes, Bytes, SkewedPtr, Words}; + +#[no_mangle] +pub unsafe extern "C" fn alloc_bytes(n: Bytes) -> SkewedPtr { + alloc_words(bytes_to_words(n)) +} + +#[no_mangle] +pub unsafe extern "C" fn alloc_words(n: Words) -> SkewedPtr { + let bytes = words_to_bytes(n); + // Update ALLOCATED + gc::ALLOCATED.0 += bytes.0 as u64; + + // Update heap pointer + let old_hp = gc::get_hp(); + let new_hp = old_hp + bytes.0 as usize; + gc::set_hp(new_hp); + + // Grow memory if needed + grow_memory(new_hp); + + skew(old_hp) +} + +/// Page allocation. Ensures that the memory up to the given pointer is allocated. +pub(crate) unsafe fn grow_memory(ptr: usize) { + let total_pages_needed = ((ptr / 65536) + 1) as i32; + let current_pages = wasm32::memory_size(0) as i32; + let new_pages_needed = total_pages_needed - current_pages; + if new_pages_needed > 0 { + if wasm32::memory_grow(0, new_pages_needed as usize) == core::usize::MAX { + rts_trap_with("Cannot grow memory\0".as_ptr()); + } + } +} diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs new file mode 100644 index 00000000000..41d04f24fba --- /dev/null +++ b/rts/motoko-rts/src/gc.rs @@ -0,0 +1,420 @@ +use crate::alloc; +use crate::rts_trap_with; +use crate::types::*; + +extern "C" { + /// Get end_of_heap. Implemented by the compiler. + pub(crate) fn get_hp() -> usize; + + /// Set end_of_heap. Implemented by the compiler. + pub(crate) fn set_hp(hp: usize); + + /// Get __heap_base + pub(crate) fn get_heap_base() -> usize; + + /// Skewed pointer to a skewed pointer to an array. See closure-table.c for details. + pub(crate) fn closure_table_loc() -> SkewedPtr; + + /// Get pointer to the static memory with an array to the static roots. Provided by the + /// generated code. + pub(crate) fn get_static_roots() -> SkewedPtr; + + /// Provided by the C RTS, in `rts.c`. + pub(crate) fn as_memcpy(to: usize, from: usize, n: Bytes); +} + +/// Maximum live data retained in a GC. +// +// NOTE (osa): In the original code (compile.ml) this variable was 64-bit, but I'm not sure why +// that is necessary. Pointers in wasm32 are 32-bits so if the entire address space is live you +// you max u32::MAX here, no need for 64-bits. +// +static mut MAX_LIVE: Bytes = Bytes(0); + +/// Amount of garbage collected so far. +static mut RECLAIMED: Bytes = Bytes(0); + +/// Counter for total allocations done by `alloc::alloc_words` (called by the generated code). +pub(crate) static mut ALLOCATED: Bytes = Bytes(0); + +unsafe fn note_live_size(live: Bytes) { + MAX_LIVE = Bytes(::core::cmp::max(MAX_LIVE.0, live.0)); +} + +#[no_mangle] +unsafe extern "C" fn get_max_live_size() -> Bytes { + MAX_LIVE +} + +unsafe fn note_reclaimed(reclaimed: Bytes) { + RECLAIMED.0 += reclaimed.0 as u64; +} + +#[no_mangle] +unsafe extern "C" fn get_reclaimed() -> Bytes { + RECLAIMED +} + +#[no_mangle] +unsafe extern "C" fn get_total_allocations() -> Bytes { + ALLOCATED +} + +/// Returns object size in words +pub(crate) unsafe fn object_size(obj: usize) -> Words { + let obj = obj as *const Obj; + match (*obj).tag { + TAG_OBJECT => { + let object = obj as *const Object; + let size = (*object).size; + Words(size + 3) // TODO: document what "3" includes + } + + TAG_OBJ_IND => Words(2), + + TAG_ARRAY => { + let array = obj as *const Array; + let size = (*array).len; + Words(size + 2) // TODO: document what "2" includes + } + + TAG_BITS64 => Words(3), + + TAG_MUTBOX => Words(2), + + TAG_CLOSURE => { + let closure = obj as *const Closure; + let size = (*closure).size; + Words(size + 3) // TODO: document what "3" includes + } + + TAG_SOME => Words(2), + + TAG_VARIANT => Words(3), + + TAG_BLOB => { + let blob = obj as *const Blob; + Words(bytes_to_words((*blob).len).0 + 2) // TODO: document this + } + + TAG_INDIRECTION => { + rts_trap_with("object_size of indirection\0".as_ptr()); + } + + TAG_BITS32 => Words(2), + + TAG_BIGINT => Words(5), + + TAG_CONCAT => Words(4), + + _ => { + rts_trap_with("Invalid object tag in object size\0".as_ptr()); + } + } +} + +pub(crate) fn is_tagged_scalar(p: SkewedPtr) -> bool { + p.0 & 0b1 == 0 +} + +unsafe fn memcpy_words(to: usize, from: usize, n: Words) { + as_memcpy(to, from, words_to_bytes(n)) +} + +unsafe fn memcpy_bytes(to: usize, from: usize, n: Bytes) { + as_memcpy(to, from, n) +} + +unsafe fn memset(s: usize, c: Words, b: u32) { + let s_ptr = s as *mut u32; + for i in 0..c.0 { + *s_ptr.offset(i as isize) = b; + } +} + +/// Evacuate (copy) an object in from-space to to-space, return new end of to-space. Returns the +/// original to-space if the object is already evacuated. +/// +/// Arguments: +/// +/// - begin_from_space: Where the dynamic heap starts. Used for two things: +/// +/// - An object is static if its address is below this value. These objects don't point to +/// dynamic heap so we skip those. +/// +/// - After all objects are evacuated we move to-space to from-space, to be able to do that the +/// pointers need to point to their locations in from-space, which is calculated with +/// `end_to_space - begin_to_space + begin_from_space`. +/// +/// - begin_to_space: Where to-space starts. See above for how this is used. +/// +/// - end_to_space: Where the object in `ptr_loc` will be copied. +/// +/// - ptr_loc: Location of the object to evacuate, e.g. an object field address. +/// +unsafe fn evac( + begin_from_space: usize, + begin_to_space: usize, + end_to_space: usize, + ptr_loc: usize, +) -> usize { + // Field holds a skewed pointer to the object to evacuate + let ptr_loc = ptr_loc as *mut SkewedPtr; + + if is_tagged_scalar(*ptr_loc) { + return end_to_space; + } + + // Ignore static objects, they can't point to dynamic heap + if (*ptr_loc).unskew() < begin_from_space { + return end_to_space; + } + + let obj = (*ptr_loc).unskew() as *mut Obj; + + // Update the field if the object is already evacauted + if (*obj).tag == TAG_INDIRECTION { + let fwd = (*(obj as *const Indirection)).fwd; + *ptr_loc = fwd; + return end_to_space; + } + + let obj_size = object_size(obj as usize); + let obj_size_bytes = words_to_bytes(obj_size); + + // Grow memory if needed + alloc::grow_memory(end_to_space + obj_size_bytes.0 as usize); + + // Copy object to to-space + memcpy_words(end_to_space, obj as usize, obj_size); + + // Final location of the object after copying to-space back to from-space + let obj_loc = (end_to_space - begin_to_space) + begin_from_space; + + // Set forwarding pointer + let fwd = obj as *mut Indirection; + (*fwd).header.tag = TAG_INDIRECTION; + (*fwd).fwd = skew(obj_loc); + + // Update evacuated field + *ptr_loc = skew(obj_loc); + + // Return new end of to-space + end_to_space + obj_size_bytes.0 as usize +} + +/// Evacuate a blob payload pointed by a bigint. bigints are special in that a bigint's first field +/// is an internal pointer: it points to payload of a blob object, instead of to the header. +/// +/// - `ptr_loc`: Address of a `data_ptr` field of a BigInt (see types.rs). Points to payload of a +/// blob. See types.rs for blob layout. +unsafe fn evac_bigint_blob( + begin_from_space: usize, + begin_to_space: usize, + end_to_space: usize, + ptr_loc: *mut usize, // address of field with a pointer to a blob payload +) -> usize { + let blob_payload_addr = *ptr_loc; + + // Get blob object from the payload + let mut blob_obj_addr = skew(blob_payload_addr - 2 * (WORD_SIZE as usize)); + // Create a temporary field to the blob object, to be passed to `evac`. + let blob_obj_addr_field = &mut blob_obj_addr; + let blob_obj_addr_field_ptr = blob_obj_addr_field as *mut _; + + let ret = evac( + begin_from_space, + begin_to_space, + end_to_space, + blob_obj_addr_field_ptr as usize, + ); + + // blob_obj_addr_field now has the new location of the blob, get the payload address + let blob_new_addr = (*blob_obj_addr_field).unskew(); + let blob_new_payload_addr = blob_new_addr + 2 * (WORD_SIZE as usize); + + // Update evacuated field + *ptr_loc = blob_new_payload_addr; // not skewed! + + ret +} + +unsafe fn scav( + begin_from_space: usize, + begin_to_space: usize, + mut end_to_space: usize, + obj: usize, +) -> usize { + let obj = obj as *const Obj; + + match (*obj).tag { + TAG_OBJECT => { + let obj = obj as *mut Object; + let obj_payload = obj.offset(1) as *mut SkewedPtr; + for i in 0..(*obj).size as isize { + end_to_space = evac( + begin_from_space, + begin_to_space, + end_to_space, + obj_payload.offset(i) as usize, + ); + } + } + + TAG_ARRAY => { + let array = obj as *mut Array; + let array_payload = array.offset(1) as *mut SkewedPtr; + for i in 0..(*array).len as isize { + end_to_space = evac( + begin_from_space, + begin_to_space, + end_to_space, + array_payload.offset(i) as usize, + ); + } + } + + TAG_MUTBOX => { + let mutbox = obj as *mut MutBox; + let field_addr = ((&mut (*mutbox).field) as *mut _) as usize; + end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field_addr); + } + + TAG_CLOSURE => { + let closure = obj as *mut Closure; + let closure_payload = closure.offset(1) as *mut SkewedPtr; + for i in 0..(*closure).size as isize { + end_to_space = evac( + begin_from_space, + begin_to_space, + end_to_space, + closure_payload.offset(i) as usize, + ); + } + } + + TAG_SOME => { + let some = obj as *mut Some; + let field_addr = ((&mut (*some).field) as *mut _) as usize; + end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field_addr); + } + + TAG_VARIANT => { + let variant = obj as *mut Variant; + let field_addr = ((&mut (*variant).field) as *mut _) as usize; + end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field_addr); + } + + TAG_BIGINT => { + let bigint = obj as *mut BigInt; + let data_ptr_addr = (&mut (*bigint).data_ptr) as *mut _; + + end_to_space = evac_bigint_blob( + begin_from_space, + begin_to_space, + end_to_space, + data_ptr_addr, + ); + } + + TAG_CONCAT => { + let concat = obj as *mut Concat; + let field1_addr = ((&mut (*concat).text1) as *mut _) as usize; + end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field1_addr); + let field2_addr = ((&mut (*concat).text2) as *mut _) as usize; + end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field2_addr); + } + + TAG_OBJ_IND => { + let obj_ind = obj as *mut ObjInd; + let field_addr = ((&mut (*obj_ind).field) as *mut _) as usize; + end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field_addr); + } + + TAG_BITS64 | TAG_BITS32 | TAG_BLOB => { + // These don't include pointers, skip + } + + TAG_INDIRECTION => { + // These are ignored in the original code for some reason + // TODO (osa): I think this branch should panic + } + + _ => { + // Any other tag is a bug + rts_trap_with("invalid object tag in scav\0".as_ptr()); + } + } + + end_to_space +} + +// We have a special evacuation routine for "static roots" array: we don't evacuate elements of +// "static roots", we just scavenge them. +unsafe fn evac_static_roots( + begin_from_space: usize, + begin_to_space: usize, + mut end_to_space: usize, + roots: *const Array, +) -> usize { + // Roots are in a static array which we don't evacuate. Only evacuate elements. + for i in 0..(*roots).len { + let obj = SkewedPtr(array_get(roots, i) as usize); + end_to_space = scav(begin_from_space, begin_to_space, end_to_space, obj.unskew()); + } + end_to_space +} + +/// The entry point. Called by the generated code. +#[no_mangle] +pub unsafe extern "C" fn collect() { + // Beginning of tospace = end of fromspace + let begin_from_space = get_heap_base(); + let end_from_space = get_hp(); + let begin_to_space = end_from_space; + let mut end_to_space = begin_to_space; + + let static_roots = get_static_roots().unskew() as *const Array; + + // Evacuate roots + end_to_space = evac_static_roots(begin_from_space, begin_to_space, end_to_space, static_roots); + + end_to_space = evac( + begin_from_space, + begin_to_space, + end_to_space, + closure_table_loc().unskew(), + ); + + // Scavenge to-space + let mut p = begin_to_space; + while p < end_to_space { + end_to_space = scav(begin_from_space, begin_to_space, end_to_space, p); + p += words_to_bytes(object_size(p)).0 as usize; + } + + // Note the stats + let new_live_size = end_to_space - begin_to_space; + note_live_size(Bytes(new_live_size as u32)); + + let reclaimed = (end_from_space - begin_from_space) - (end_to_space - begin_to_space); + note_reclaimed(Bytes(reclaimed as u32)); + + // Copy to-space to the beginning of from-space + memcpy_bytes( + begin_from_space, + begin_to_space, + Bytes((end_to_space - begin_to_space) as u32), + ); + + // Reset the heap pointer + let new_hp = begin_from_space + (end_to_space - begin_to_space); + set_hp(new_hp); + + // Reset scratch space (for debugging purposes) + memset( + new_hp, + bytes_to_words(Bytes((end_to_space - new_hp) as u32)), + 0, + ); +} diff --git a/rts/motoko-rts/src/lib.rs b/rts/motoko-rts/src/lib.rs index 63433bd1563..3cfc4defbbd 100644 --- a/rts/motoko-rts/src/lib.rs +++ b/rts/motoko-rts/src/lib.rs @@ -3,6 +3,10 @@ #![no_std] +mod alloc; +mod gc; +mod types; + extern "C" { pub(crate) fn rts_trap_with(msg: *const u8) -> !; } diff --git a/rts/motoko-rts/src/types.rs b/rts/motoko-rts/src/types.rs new file mode 100644 index 00000000000..0317f18e2ac --- /dev/null +++ b/rts/motoko-rts/src/types.rs @@ -0,0 +1,148 @@ +pub const WORD_SIZE: u32 = 4; + +pub fn words_to_bytes(words: Words) -> Bytes { + Bytes(words.0 * WORD_SIZE) +} + +// Rounds up +pub fn bytes_to_words(bytes: Bytes) -> Words { + // Rust issue for adding ceiling_div: https://github.com/rust-lang/rfcs/issues/2844 + Words((bytes.0 + WORD_SIZE - 1) / WORD_SIZE) +} + +/// The unit "words": `Words(123u32)` means 123 words. +#[repr(C)] +#[derive(PartialEq, Eq, Clone, Copy)] +pub struct Words(pub A); + +/// The unit "bytes": `Bytes(123u32)` means 123 bytes. +#[repr(C)] +#[derive(PartialEq, Eq, Clone, Copy)] +pub struct Bytes(pub A); + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct SkewedPtr(pub usize); + +impl SkewedPtr { + pub fn unskew(self) -> usize { + self.0.wrapping_add(1) + } +} + +pub fn skew(ptr: usize) -> SkewedPtr { + SkewedPtr(ptr.wrapping_sub(1)) +} + +// NOTE: We don't create an enum for tags as we can never assume to do exhaustive pattern match on +// tags, because of heap corruptions and other bugs (in the code generator or RTS, or maybe because +// of an unsafe API usage). +pub type Tag = u32; + +pub const TAG_OBJECT: Tag = 1; +pub const TAG_OBJ_IND: Tag = 2; +pub const TAG_ARRAY: Tag = 3; +pub const TAG_BITS64: Tag = 5; +pub const TAG_MUTBOX: Tag = 6; +pub const TAG_CLOSURE: Tag = 7; +pub const TAG_SOME: Tag = 8; +pub const TAG_VARIANT: Tag = 9; +pub const TAG_BLOB: Tag = 10; +pub const TAG_INDIRECTION: Tag = 11; +pub const TAG_BITS32: Tag = 12; +pub const TAG_BIGINT: Tag = 13; +pub const TAG_CONCAT: Tag = 14; + +// Common parts of any object. Other object pointers can be coerced into a pointer to this. +#[repr(C)] +pub struct Obj { + pub tag: Tag, +} + +#[repr(C)] +#[rustfmt::skip] +pub struct Array { + pub header: Obj, + pub len: u32, // number of elements + + // Array elements follow, each u32 sized. We can't have variable-sized structs in Rust so we + // can't add a field here for the elements. + // https://doc.rust-lang.org/nomicon/exotic-sizes.html +} + +pub(crate) unsafe fn array_get(array: *const Array, idx: u32) -> u32 { + let slot_addr = array.offset(1) as usize + (idx * WORD_SIZE) as usize; + *(slot_addr as *const u32) +} + +#[repr(C)] +pub struct Object { + pub header: Obj, + pub size: u32, + pub hash_ptr: u32, // TODO: Not sure how this is used, we don't scavenge this field in GC + // other stuff follows, but we don't need them currently +} + +#[repr(C)] +pub struct ObjInd { + pub header: Obj, + pub field: SkewedPtr, +} + +#[repr(C)] +pub struct Closure { + pub header: Obj, + pub funid: u32, + pub size: u32, // number of elements + // other stuff follows ... +} + +#[repr(C)] +pub struct Blob { + pub header: Obj, + pub len: Bytes, + // data follows .. +} + +// aka. a forwarding pointer +#[repr(C)] +pub struct Indirection { + pub header: Obj, + pub fwd: SkewedPtr, +} + +#[repr(C)] +pub struct BigInt { + pub header: Obj, + pub size: u32, + pub alloc: u32, // TODO: Not sure what this is + // Unskewed pointer to a blob payload. data_ptr - 2 (words) gives us the blob header. + pub data_ptr: usize, +} + +#[repr(C)] +pub struct MutBox { + pub header: Obj, + pub field: SkewedPtr, +} + +#[repr(C)] +pub struct Some { + pub header: Obj, + pub field: SkewedPtr, +} + +#[repr(C)] +pub struct Variant { + pub header: Obj, + pub tag: u32, + pub field: SkewedPtr, +} + +#[repr(C)] +pub struct Concat { + pub header: Obj, + pub n_bytes: u32, + pub text1: SkewedPtr, + pub text2: SkewedPtr, +} diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index 0c61da14291..26ddd8cda75 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -768,7 +768,6 @@ module RTS = struct E.add_func_import env "rts" "remember_closure" [I32Type] [I32Type]; E.add_func_import env "rts" "recall_closure" [I32Type] [I32Type]; E.add_func_import env "rts" "closure_count" [] [I32Type]; - E.add_func_import env "rts" "closure_table_loc" [] [I32Type]; E.add_func_import env "rts" "closure_table_size" [] [I32Type]; E.add_func_import env "rts" "blob_of_text" [I32Type] [I32Type]; E.add_func_import env "rts" "text_compare" [I32Type; I32Type] [I32Type]; @@ -805,6 +804,12 @@ module RTS = struct E.add_func_import env "rts" "char_is_lowercase" [I32Type] [I32Type]; E.add_func_import env "rts" "char_is_uppercase" [I32Type] [I32Type]; E.add_func_import env "rts" "char_is_alphabetic" [I32Type] [I32Type]; + E.add_func_import env "rts" "get_max_live_size" [] [I32Type]; + E.add_func_import env "rts" "get_reclaimed" [] [I64Type]; + E.add_func_import env "rts" "collect" [] []; + E.add_func_import env "rts" "alloc_bytes" [I32Type] [I32Type]; + E.add_func_import env "rts" "alloc_words" [I32Type] [I32Type]; + E.add_func_import env "rts" "get_total_allocations" [] [I64Type]; () end (* RTS *) @@ -826,117 +831,34 @@ module Heap = struct G.i (GlobalGet (nr (E.get_global env "end_of_heap"))) let set_heap_ptr env = G.i (GlobalSet (nr (E.get_global env "end_of_heap"))) - let get_skewed_heap_ptr env = get_heap_ptr env ^^ compile_add_const ptr_skew let register_globals env = (* end-of-heap pointer, we set this to __heap_base upon start *) E.add_global32 env "end_of_heap" Mutable 0xDEADBEEFl; - (* counter for total allocations *) - E.add_global64 env "allocations" Mutable 0L; - (* counter for total reclaimed bytes *) E.add_global64 env "reclaimed" Mutable 0L; (* counter for max live bytes *) E.add_global64 env "max_live" Mutable 0L - let count_allocations env = - (* assumes number of allocated bytes on the stack *) - G.i (Convert (Wasm.Values.I64 I64Op.ExtendUI32)) ^^ - G.i (GlobalGet (nr (E.get_global env "allocations"))) ^^ - G.i (Binary (Wasm.Values.I64 I64Op.Add)) ^^ - G.i (GlobalSet (nr (E.get_global env "allocations"))) - let get_total_allocation env = - G.i (GlobalGet (nr (E.get_global env "allocations"))) - - let add_reclaimed env = - (* assumes number of reclaimed bytes on the stack *) - G.i (Convert (Wasm.Values.I64 I64Op.ExtendUI32)) ^^ - G.i (GlobalGet (nr (E.get_global env "reclaimed"))) ^^ - G.i (Binary (Wasm.Values.I64 I64Op.Add)) ^^ - G.i (GlobalSet (nr (E.get_global env "reclaimed"))) + E.call_import env "rts" "get_total_allocations" let get_reclaimed env = - G.i (GlobalGet (nr (E.get_global env "reclaimed"))) + E.call_import env "rts" "get_reclaimed" let get_memory_size = G.i MemorySize ^^ compile_mul_const page_size - let note_live_size env = - (* assumes size of live set on the stack *) - let (set_live_size, get_live_size) = new_local env "live_size" in - set_live_size ^^ - get_live_size ^^ G.i (Convert (Wasm.Values.I64 I64Op.ExtendUI32)) ^^ - G.i (GlobalGet (nr (E.get_global env "max_live"))) ^^ - G.i (Compare (Wasm.Values.I64 I64Op.LtU)) ^^ - G.if_ [] G.nop begin - get_live_size ^^ G.i (Convert (Wasm.Values.I64 I64Op.ExtendUI32)) ^^ - G.i (GlobalSet (nr (E.get_global env "max_live"))) - end - let get_max_live_size env = - G.i (GlobalGet (nr (E.get_global env "max_live"))) - - - (* Page allocation. Ensures that the memory up to the given unskewed pointer is allocated. *) - let grow_memory env = - Func.share_code1 env "grow_memory" ("ptr", I32Type) [] (fun env get_ptr -> - let (set_pages_needed, get_pages_needed) = new_local env "pages_needed" in - get_ptr ^^ compile_divU_const page_size ^^ - compile_add_const 1l ^^ - G.i MemorySize ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^ - set_pages_needed ^^ - - (* Check that the new heap pointer is within the memory *) - get_pages_needed ^^ - compile_unboxed_zero ^^ - G.i (Compare (Wasm.Values.I32 I32Op.GtS)) ^^ - G.if_ [] - ( get_pages_needed ^^ - G.i MemoryGrow ^^ - (* Check result *) - compile_unboxed_zero ^^ - G.i (Compare (Wasm.Values.I32 I32Op.LtS)) ^^ - E.then_trap_with env "Cannot grow memory." - ) G.nop - ) - - let dyn_alloc_words env = G.i (Call (nr (E.built_in env "alloc_words"))) - let dyn_alloc_bytes env = G.i (Call (nr (E.built_in env "alloc_bytes"))) + E.call_import env "rts" "get_max_live_size" - let declare_alloc_functions env = - (* Dynamic allocation *) - Func.define_built_in env "alloc_words" [("n", I32Type)] [I32Type] (fun env -> - (* expects the size (in words), returns the skewed pointer *) - let get_n = G.i (LocalGet (nr 0l)) in - (* return the current pointer (skewed) *) - get_skewed_heap_ptr env ^^ - - (* Count allocated bytes *) - get_n ^^ compile_mul_const word_size ^^ - count_allocations env ^^ - - (* Update heap pointer *) - get_heap_ptr env ^^ - get_n ^^ compile_mul_const word_size ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - set_heap_ptr env ^^ - - (* grow memory if needed *) - get_heap_ptr env ^^ grow_memory env - ); - Func.define_built_in env "alloc_bytes" [("n", I32Type)] [I32Type] (fun env -> - let get_n = G.i (LocalGet (nr 0l)) in - (* Round up to next multiple of the word size and convert to words *) - get_n ^^ - compile_add_const 3l ^^ - compile_divU_const word_size ^^ - dyn_alloc_words env - ) + let dyn_alloc_words env = + E.call_import env "rts" "alloc_words" + let dyn_alloc_bytes env = + E.call_import env "rts" "alloc_bytes" (* Static allocation (always words) (uses dynamic allocation for smaller and more readable code) *) @@ -998,24 +920,6 @@ module Heap = struct (* Comparing bytes (works on unskewed memory addresses) *) let memcmp env = E.call_import env "rts" "as_memcmp" - (* Copying words (works on skewed memory addresses) *) - let memcpy_words_skewed env = - Func.share_code3 env "memcpy_words_skewed" (("to", I32Type), ("from", I32Type), ("n", I32Type)) [] (fun env get_to get_from get_n -> - get_n ^^ - from_0_to_n env (fun get_i -> - get_to ^^ - get_i ^^ compile_mul_const word_size ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - - get_from ^^ - get_i ^^ compile_mul_const word_size ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - load_ptr ^^ - - store_ptr - ) - ) - end (* Heap *) module Stack = struct @@ -1068,7 +972,6 @@ module ClosureTable = struct let recall env : G.t = E.call_import env "rts" "recall_closure" let count env : G.t = E.call_import env "rts" "closure_count" let size env : G.t = E.call_import env "rts" "closure_table_size" - let root env : G.t = E.call_import env "rts" "closure_table_loc" end (* ClosureTable *) module Bool = struct @@ -1227,10 +1130,10 @@ module Tagged = struct | Some (* For opt *) | Variant | Blob - | Indirection + (* | Indirection -- commented out, only used by the GC *) | Bits32 (* Contains a 32 bit unsigned number *) | BigInt - | Concat (* String concatenation, used by rts/text.c *) + (* | Concat -- String concatenation, used by rts/text.c *) | StableSeen (* Marker that we have seen this thing before *) (* Let's leave out tag 0 to trap earlier on invalid memory *) @@ -1244,10 +1147,8 @@ module Tagged = struct | Some -> 8l | Variant -> 9l | Blob -> 10l - | Indirection -> 11l | Bits32 -> 12l | BigInt -> 13l - | Concat -> 14l | StableSeen -> 0xffffffffl (* The tag *) @@ -1278,12 +1179,6 @@ module Tagged = struct set_tag ^^ go cases - (* like branch_default but the tag is known statically *) - let branch env retty = function - | [] -> G.i Unreachable - | [_, code] -> G.i Drop ^^ code - | (_, code) :: cases -> branch_default env retty code cases - (* like branch_default but also pushes the scrutinee on the stack for the * branch's consumption *) let _branch_default_with env retty def cases = @@ -2878,9 +2773,6 @@ module Text = struct This is internal to rts/text.c, with the exception of GC-related code. *) - let concat_field1 = Int32.add Tagged.header_size 1l - let concat_field2 = Int32.add Tagged.header_size 2l - let of_ptr_size env = E.call_import env "rts" "text_of_ptr_size" let concat env = @@ -2937,7 +2829,7 @@ module Arr = struct No difference between mutable and immutable arrays. *) - let header_size = Int32.add Tagged.header_size 1l + let header_size = Int32.add Tagged.header_size 1l (* 2 *) let element_size = 4l let len_field = Int32.add Tagged.header_size 0l @@ -3207,6 +3099,9 @@ module Lifecycle = struct end (* Lifecycle *) +let collect_garbage env = + E.call_import env "rts" "collect" + module Dfinity = struct (* Dfinity-specific stuff: System imports, databufs etc. *) @@ -3345,7 +3240,7 @@ module Dfinity = struct G.i (Call (nr (E.built_in env "init"))) ^^ (* Collect garbage *) - G.i (Call (nr (E.built_in env "collect"))) ^^ + collect_garbage env ^^ Lifecycle.trans env Lifecycle.Idle ) in @@ -3382,7 +3277,7 @@ module Dfinity = struct Lifecycle.trans env Lifecycle.InPostUpgrade ^^ G.i (Call (nr (E.built_in env "post_exp"))) ^^ Lifecycle.trans env Lifecycle.Idle ^^ - G.i (Call (nr (E.built_in env "collect"))) + collect_garbage env )) in E.add_export env (nr { @@ -3511,15 +3406,6 @@ end (* Dfinity *) module RTS_Exports = struct let system_exports env = - Heap.declare_alloc_functions env; - E.add_export env (nr { - name = Wasm.Utf8.decode "alloc_bytes"; - edesc = nr (FuncExport (nr (E.built_in env "alloc_bytes"))) - }); - E.add_export env (nr { - name = Wasm.Utf8.decode "alloc_words"; - edesc = nr (FuncExport (nr (E.built_in env "alloc_words"))) - }); let bigint_trap_fi = E.add_fun env "bigint_trap" ( Func.of_body env [] [] (fun env -> E.trap_with env "bigint function error" @@ -3543,157 +3429,6 @@ module RTS_Exports = struct end (* RTS_Exports *) - -module HeapTraversal = struct - (* Returns the object size (in words) *) - let object_size env = - Func.share_code1 env "object_size" ("x", I32Type) [I32Type] (fun env get_x -> - get_x ^^ - Tagged.branch env [I32Type] - [ Tagged.Bits64, - compile_unboxed_const 3l - ; Tagged.Bits32, - compile_unboxed_const 2l - ; Tagged.BigInt, - compile_unboxed_const 5l (* HeapTag + sizeof(mp_int) *) - ; Tagged.Some, - compile_unboxed_const 2l - ; Tagged.Variant, - compile_unboxed_const 3l - ; Tagged.ObjInd, - compile_unboxed_const 2l - ; Tagged.MutBox, - compile_unboxed_const 2l - ; Tagged.Array, - get_x ^^ - Heap.load_field Arr.len_field ^^ - compile_add_const Arr.header_size - ; Tagged.Blob, - get_x ^^ - Heap.load_field Blob.len_field ^^ - compile_add_const 3l ^^ - compile_divU_const Heap.word_size ^^ - compile_add_const Blob.header_size - ; Tagged.Object, - get_x ^^ - Heap.load_field Object.size_field ^^ - compile_add_const Object.header_size - ; Tagged.Closure, - get_x ^^ - Heap.load_field Closure.len_field ^^ - compile_add_const Closure.header_size - ; Tagged.Concat, - compile_unboxed_const 4l - ] - (* Indirections have unknown size. *) - ) - - let walk_heap_from_to env compile_from compile_to mk_code = - let (set_x, get_x) = new_local env "x" in - compile_from ^^ set_x ^^ - compile_while - (* While we have not reached the end of the area *) - ( get_x ^^ - compile_to ^^ - G.i (Compare (Wasm.Values.I32 I32Op.LtU)) - ) - ( mk_code get_x ^^ - get_x ^^ - get_x ^^ object_size env ^^ compile_mul_const Heap.word_size ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - set_x - ) - - let for_each_array_elem env get_array mk_code = - get_array ^^ - Heap.load_field Arr.len_field ^^ - from_0_to_n env (fun get_i -> - mk_code ( - get_array ^^ - get_i ^^ - Arr.idx env - ) - ) - - (* Calls mk_code for each pointer in the object pointed to by get_x, - passing code get the address of the pointer, - and code to get the offset of the pointer (for the BigInt payload field). *) - let for_each_pointer env get_x mk_code mk_code_offset = - let (set_ptr_loc, get_ptr_loc) = new_local env "ptr_loc" in - let code = mk_code get_ptr_loc in - let code_offset = mk_code_offset get_ptr_loc in - get_x ^^ - Tagged.branch_default env [] G.nop - [ Tagged.MutBox, - get_x ^^ - compile_add_const (Int32.mul Heap.word_size MutBox.field) ^^ - set_ptr_loc ^^ - code - ; Tagged.BigInt, - get_x ^^ - compile_add_const (Int32.mul Heap.word_size 4l) ^^ - set_ptr_loc ^^ - code_offset Blob.unskewed_payload_offset - ; Tagged.Some, - get_x ^^ - compile_add_const (Int32.mul Heap.word_size Opt.payload_field) ^^ - set_ptr_loc ^^ - code - ; Tagged.Variant, - get_x ^^ - compile_add_const (Int32.mul Heap.word_size Variant.payload_field) ^^ - set_ptr_loc ^^ - code - ; Tagged.ObjInd, - get_x ^^ - compile_add_const (Int32.mul Heap.word_size 1l) ^^ - set_ptr_loc ^^ - code - ; Tagged.Array, - for_each_array_elem env get_x (fun get_elem_ptr -> - get_elem_ptr ^^ - set_ptr_loc ^^ - code - ) - ; Tagged.Object, - get_x ^^ - Heap.load_field Object.size_field ^^ - - from_0_to_n env (fun get_i -> - get_i ^^ - compile_add_const Object.header_size ^^ - compile_mul_const Heap.word_size ^^ - get_x ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - set_ptr_loc ^^ - code - ) - ; Tagged.Closure, - get_x ^^ - Heap.load_field Closure.len_field ^^ - - from_0_to_n env (fun get_i -> - get_i ^^ - compile_add_const Closure.header_size ^^ - compile_mul_const Heap.word_size ^^ - get_x ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - set_ptr_loc ^^ - code - ) - ; Tagged.Concat, - get_x ^^ - compile_add_const (Int32.mul Heap.word_size Text.concat_field1) ^^ - set_ptr_loc ^^ - code ^^ - get_x ^^ - compile_add_const (Int32.mul Heap.word_size Text.concat_field2) ^^ - set_ptr_loc ^^ - code - ] - -end (* HeapTraversal *) - module Serialization = struct (* The general serialization strategy is as follows: @@ -4937,192 +4672,51 @@ module Stabilization = struct end module GC = struct - (* This is a very simple GC: - It copies everything live to the to-space beyond the bump pointer, - then it memcpies it back, over the from-space (so that we still neatly use - the beginning of memory). - - Roots are: - * All objects in the static part of the memory. - * the closure_table (see module ClosureTable) - *) - - let gc_enabled = true - - (* If the pointer at ptr_loc points after begin_from_space, copy - to after end_to_space, and replace it with a pointer, adjusted for where - the object will be finally. *) - (* Returns the new end of to_space *) - (* Invariant: Must not be called on the same pointer twice. *) - (* All pointers, including ptr_loc and space end markers, are skewed *) - - let evacuate_common env - get_obj update_ptr - get_begin_from_space get_begin_to_space get_end_to_space - = - - let (set_len, get_len) = new_local env "len" in - - (* If this is static, ignore it *) - get_obj ^^ - get_begin_from_space ^^ - G.i (Compare (Wasm.Values.I32 I32Op.LtU)) ^^ - G.if_ [] (get_end_to_space ^^ G.i Return) G.nop ^^ - - (* If this is an indirection, just use that value *) - get_obj ^^ - Tagged.branch_default env [] G.nop [ - Tagged.Indirection, - update_ptr (get_obj ^^ Heap.load_field 1l) ^^ - get_end_to_space ^^ G.i Return - ] ^^ - - (* Get object size *) - get_obj ^^ HeapTraversal.object_size env ^^ set_len ^^ - - (* Grow memory if needed *) - get_end_to_space ^^ - get_len ^^ compile_mul_const Heap.word_size ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - Heap.grow_memory env ^^ - - (* Copy the referenced object to to space *) - get_obj ^^ HeapTraversal.object_size env ^^ set_len ^^ - get_end_to_space ^^ get_obj ^^ get_len ^^ Heap.memcpy_words_skewed env ^^ - - let (set_new_ptr, get_new_ptr) = new_local env "new_ptr" in - - (* Calculate new pointer *) - get_end_to_space ^^ - get_begin_to_space ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^ - get_begin_from_space ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - set_new_ptr ^^ - - (* Set indirection *) - get_obj ^^ - Tagged.(store Indirection) ^^ - get_obj ^^ - get_new_ptr ^^ - Heap.store_field 1l ^^ - - (* Update pointer *) - update_ptr get_new_ptr ^^ - - (* Calculate new end of to space *) - get_end_to_space ^^ - get_len ^^ compile_mul_const Heap.word_size ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) + let register env static_roots = - (* Used for normal skewed pointers *) - let evacuate env = Func.share_code4 env "evacuate" (("begin_from_space", I32Type), ("begin_to_space", I32Type), ("end_to_space", I32Type), ("ptr_loc", I32Type)) [I32Type] (fun env get_begin_from_space get_begin_to_space get_end_to_space get_ptr_loc -> + let get_static_roots = E.add_fun env "get_static_roots" (Func.of_body env [] [I32Type] (fun env -> + compile_unboxed_const static_roots + )) in - let get_obj = get_ptr_loc ^^ load_ptr in + E.add_export env (nr { + name = Wasm.Utf8.decode "get_static_roots"; + edesc = nr (FuncExport (nr get_static_roots)) + }); - (* If this is an unboxed scalar, ignore it *) - get_obj ^^ - BitTagged.if_tagged_scalar env [] (get_end_to_space ^^ G.i Return) G.nop ^^ + let get_hp = E.add_fun env "get_hp" (Func.of_body env [] [I32Type] (fun env -> + Heap.get_heap_ptr env + )) in - let update_ptr new_val_code = - get_ptr_loc ^^ new_val_code ^^ store_ptr in + E.add_export env (nr { + name = Wasm.Utf8.decode "get_hp"; + edesc = nr (FuncExport (nr get_hp)) + }); - evacuate_common env - get_obj update_ptr - get_begin_from_space get_begin_to_space get_end_to_space - ) + let set_hp = E.add_fun env "set_hp" (Func.of_body env [("new_hp", I32Type)] [] (fun env -> + G.i (LocalGet (nr (Int32.of_int 0))) ^^ + Heap.set_heap_ptr env + )) in - (* A variant for pointers that point into the payload (used for the bignum objects). - These are never scalars. *) - let evacuate_offset env offset = - let name = Printf.sprintf "evacuate_offset_%d" (Int32.to_int offset) in - Func.share_code4 env name (("begin_from_space", I32Type), ("begin_to_space", I32Type), ("end_to_space", I32Type), ("ptr_loc", I32Type)) [I32Type] (fun env get_begin_from_space get_begin_to_space get_end_to_space get_ptr_loc -> - let get_obj = get_ptr_loc ^^ load_ptr ^^ compile_sub_const offset in + E.add_export env (nr { + name = Wasm.Utf8.decode "set_hp"; + edesc = nr (FuncExport (nr set_hp)) + }); - let update_ptr new_val_code = - get_ptr_loc ^^ new_val_code ^^ compile_add_const offset ^^ store_ptr in + let get_heap_base = E.add_fun env "get_heap_base" (Func.of_body env [] [I32Type] (fun env -> + Heap.get_heap_base env + )) in - evacuate_common env - get_obj update_ptr - get_begin_from_space get_begin_to_space get_end_to_space - ) + E.add_export env (nr { + name = Wasm.Utf8.decode "get_heap_base"; + edesc = nr (FuncExport (nr get_heap_base)) + }); - let register env static_roots = Func.define_built_in env "get_heap_size" [] [I32Type] (fun env -> Heap.get_heap_ptr env ^^ Heap.get_heap_base env ^^ G.i (Binary (Wasm.Values.I32 I32Op.Sub)) - ); - - Func.define_built_in env "collect" [] [] (fun env -> - if not gc_enabled then G.nop else - - (* Copy all roots. *) - let (set_begin_from_space, get_begin_from_space) = new_local env "begin_from_space" in - let (set_begin_to_space, get_begin_to_space) = new_local env "begin_to_space" in - let (set_end_to_space, get_end_to_space) = new_local env "end_to_space" in - - Heap.get_heap_base env ^^ compile_add_const ptr_skew ^^ set_begin_from_space ^^ - let get_end_from_space = get_begin_to_space in - Heap.get_skewed_heap_ptr env ^^ set_begin_to_space ^^ - Heap.get_skewed_heap_ptr env ^^ set_end_to_space ^^ - - - (* Common arguments for evacuate *) - let evac get_ptr_loc = - get_begin_from_space ^^ - get_begin_to_space ^^ - get_end_to_space ^^ - get_ptr_loc ^^ - evacuate env ^^ - set_end_to_space in - - let evac_offset get_ptr_loc offset = - get_begin_from_space ^^ - get_begin_to_space ^^ - get_end_to_space ^^ - get_ptr_loc ^^ - evacuate_offset env offset ^^ - set_end_to_space in - - (* Go through the roots, and evacuate them *) - HeapTraversal.for_each_array_elem env (compile_unboxed_const static_roots) (fun get_elem_ptr -> - let (set_static, get_static) = new_local env "static_obj" in - get_elem_ptr ^^ load_ptr ^^ set_static ^^ - HeapTraversal.for_each_pointer env get_static evac evac_offset - ) ^^ - evac (ClosureTable.root env) ^^ - - (* Go through the to-space, and evacuate that. - Note that get_end_to_space changes as we go, but walk_heap_from_to can handle that. - *) - HeapTraversal.walk_heap_from_to env - get_begin_to_space - get_end_to_space - (fun get_x -> HeapTraversal.for_each_pointer env get_x evac evac_offset) ^^ - - (* Note some stats *) - get_end_to_space ^^ get_begin_to_space ^^ G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^ - Heap.note_live_size env ^^ - - get_end_from_space ^^ get_begin_from_space ^^ G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^ - get_end_to_space ^^ get_begin_to_space ^^ G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^ - Heap.add_reclaimed env ^^ - - (* Copy the to-space to the beginning of memory. *) - get_begin_from_space ^^ compile_add_const ptr_unskew ^^ - get_begin_to_space ^^ compile_add_const ptr_unskew ^^ - get_end_to_space ^^ get_begin_to_space ^^ G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^ - Heap.memcpy env ^^ - - (* Reset the heap pointer *) - get_begin_from_space ^^ compile_add_const ptr_unskew ^^ - get_end_to_space ^^ get_begin_to_space ^^ G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - Heap.set_heap_ptr env - ) + ) let get_heap_size env = G.i (Call (nr (E.built_in env "get_heap_size"))) @@ -5536,7 +5130,7 @@ module FuncDec = struct let message_cleanup env sort = match sort with | Type.Shared Type.Write -> - G.i (Call (nr (E.built_in env "collect"))) ^^ + collect_garbage env ^^ Lifecycle.trans env Lifecycle.Idle | Type.Shared Type.Query -> Lifecycle.trans env Lifecycle.PostQuery @@ -7196,7 +6790,7 @@ and compile_exp (env : E.t) ae exp = | OtherPrim "rts_max_live_size", [] -> SR.Vanilla, - Heap.get_max_live_size env ^^ BigNum.from_word64 env + Heap.get_max_live_size env ^^ BigNum.from_word32 env | OtherPrim "rts_callback_table_count", [] -> SR.Vanilla, From ce9828ae574fd8ccaf7d63b982eacf32903682e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Wed, 2 Sep 2020 11:51:56 +0300 Subject: [PATCH 02/20] Apply suggestions from code review Co-authored-by: Joachim Breitner --- rts/motoko-rts/src/gc.rs | 21 ++++++++------------- rts/motoko-rts/src/types.rs | 4 +++- src/codegen/compile.ml | 11 ++++------- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index 41d04f24fba..a3265dadbbb 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -3,10 +3,10 @@ use crate::rts_trap_with; use crate::types::*; extern "C" { - /// Get end_of_heap. Implemented by the compiler. + /// Get end_of_heap. Provided by the code generator (src/codegen/compile.ml) pub(crate) fn get_hp() -> usize; - /// Set end_of_heap. Implemented by the compiler. + /// Set end_of_heap. Provided by the code generator (src/codegen/compile.ml) pub(crate) fn set_hp(hp: usize); /// Get __heap_base @@ -25,16 +25,12 @@ extern "C" { /// Maximum live data retained in a GC. // -// NOTE (osa): In the original code (compile.ml) this variable was 64-bit, but I'm not sure why -// that is necessary. Pointers in wasm32 are 32-bits so if the entire address space is live you -// you max u32::MAX here, no need for 64-bits. -// static mut MAX_LIVE: Bytes = Bytes(0); /// Amount of garbage collected so far. static mut RECLAIMED: Bytes = Bytes(0); -/// Counter for total allocations done by `alloc::alloc_words` (called by the generated code). +/// Counter for total allocations pub(crate) static mut ALLOCATED: Bytes = Bytes(0); unsafe fn note_live_size(live: Bytes) { @@ -132,8 +128,8 @@ unsafe fn memset(s: usize, c: Words, b: u32) { } } -/// Evacuate (copy) an object in from-space to to-space, return new end of to-space. Returns the -/// original to-space if the object is already evacuated. +/// Evacuate (copy) an object in from-space to to-space, return updated end_to_space. +/// If the object was already evacuated, it returns end_to_space unchanged. /// /// Arguments: /// @@ -143,7 +139,7 @@ unsafe fn memset(s: usize, c: Words, b: u32) { /// dynamic heap so we skip those. /// /// - After all objects are evacuated we move to-space to from-space, to be able to do that the -/// pointers need to point to their locations in from-space, which is calculated with +/// pointers need to point to their (eventual) locations in from-space, which is calculated with /// `end_to_space - begin_to_space + begin_from_space`. /// /// - begin_to_space: Where to-space starts. See above for how this is used. @@ -204,7 +200,7 @@ unsafe fn evac( } /// Evacuate a blob payload pointed by a bigint. bigints are special in that a bigint's first field -/// is an internal pointer: it points to payload of a blob object, instead of to the header. +/// is an internal pointer: it points to the _payload_ of a blob object, instead of skewedly pointing to the object start /// /// - `ptr_loc`: Address of a `data_ptr` field of a BigInt (see types.rs). Points to payload of a /// blob. See types.rs for blob layout. @@ -368,7 +364,6 @@ unsafe fn evac_static_roots( /// The entry point. Called by the generated code. #[no_mangle] pub unsafe extern "C" fn collect() { - // Beginning of tospace = end of fromspace let begin_from_space = get_heap_base(); let end_from_space = get_hp(); let begin_to_space = end_from_space; @@ -388,7 +383,7 @@ pub unsafe extern "C" fn collect() { // Scavenge to-space let mut p = begin_to_space; - while p < end_to_space { + while p < end_to_space { // NB: end_to_space keeps changing within this loop end_to_space = scav(begin_from_space, begin_to_space, end_to_space, p); p += words_to_bytes(object_size(p)).0 as usize; } diff --git a/rts/motoko-rts/src/types.rs b/rts/motoko-rts/src/types.rs index 0317f18e2ac..39317c7e552 100644 --- a/rts/motoko-rts/src/types.rs +++ b/rts/motoko-rts/src/types.rs @@ -114,8 +114,10 @@ pub struct Indirection { #[repr(C)] pub struct BigInt { pub header: Obj, + // the data following now must describe the `mp_int` struct + // (https://github.com/libtom/libtommath/blob/44ee82cd34d0524c171ffd0da70f83bba919aa38/tommath.h#L174-L179) pub size: u32, - pub alloc: u32, // TODO: Not sure what this is + pub alloc: u32, // Unskewed pointer to a blob payload. data_ptr - 2 (words) gives us the blob header. pub data_ptr: usize, } diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index 26ddd8cda75..fd6583dc0ba 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -2829,7 +2829,7 @@ module Arr = struct No difference between mutable and immutable arrays. *) - let header_size = Int32.add Tagged.header_size 1l (* 2 *) + let header_size = Int32.add Tagged.header_size 1l let element_size = 4l let len_field = Int32.add Tagged.header_size 0l @@ -3099,9 +3099,6 @@ module Lifecycle = struct end (* Lifecycle *) -let collect_garbage env = - E.call_import env "rts" "collect" - module Dfinity = struct (* Dfinity-specific stuff: System imports, databufs etc. *) @@ -3240,7 +3237,7 @@ module Dfinity = struct G.i (Call (nr (E.built_in env "init"))) ^^ (* Collect garbage *) - collect_garbage env ^^ + E.call_import env "rts" "collect" ^^ Lifecycle.trans env Lifecycle.Idle ) in @@ -3277,7 +3274,7 @@ module Dfinity = struct Lifecycle.trans env Lifecycle.InPostUpgrade ^^ G.i (Call (nr (E.built_in env "post_exp"))) ^^ Lifecycle.trans env Lifecycle.Idle ^^ - collect_garbage env + E.call_import env "rts" "collect" env )) in E.add_export env (nr { @@ -5130,7 +5127,7 @@ module FuncDec = struct let message_cleanup env sort = match sort with | Type.Shared Type.Write -> - collect_garbage env ^^ + E.call_import env "rts" "collect" ^^ Lifecycle.trans env Lifecycle.Idle | Type.Shared Type.Query -> Lifecycle.trans env Lifecycle.PostQuery From eb33aac501c42e2e23d3066aa4cdb6a09bf51700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Wed, 2 Sep 2020 11:54:09 +0300 Subject: [PATCH 03/20] Fix build error after applying suggestions --- src/codegen/compile.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index fd6583dc0ba..b3c18fac102 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -3274,7 +3274,7 @@ module Dfinity = struct Lifecycle.trans env Lifecycle.InPostUpgrade ^^ G.i (Call (nr (E.built_in env "post_exp"))) ^^ Lifecycle.trans env Lifecycle.Idle ^^ - E.call_import env "rts" "collect" env + E.call_import env "rts" "collect" )) in E.add_export env (nr { From 5adcd4e6b4003af2b6f5c7b86a62407d7f16a4f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Wed, 2 Sep 2020 13:09:02 +0300 Subject: [PATCH 04/20] Move heap-related functions to Heap module in compile.ml --- src/codegen/compile.ml | 81 ++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index b3c18fac102..77cef522a7c 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -920,6 +920,45 @@ module Heap = struct (* Comparing bytes (works on unskewed memory addresses) *) let memcmp env = E.call_import env "rts" "as_memcmp" + let register env = + + let get_hp_fn = E.add_fun env "get_hp" (Func.of_body env [] [I32Type] (fun env -> + get_heap_ptr env + )) in + + E.add_export env (nr { + name = Wasm.Utf8.decode "get_hp"; + edesc = nr (FuncExport (nr get_hp_fn)) + }); + + let set_hp_fn = E.add_fun env "set_hp" (Func.of_body env [("new_hp", I32Type)] [] (fun env -> + G.i (LocalGet (nr (Int32.of_int 0))) ^^ + set_heap_ptr env + )) in + + E.add_export env (nr { + name = Wasm.Utf8.decode "set_hp"; + edesc = nr (FuncExport (nr set_hp_fn)) + }); + + let get_heap_base_fn = E.add_fun env "get_heap_base" (Func.of_body env [] [I32Type] (fun env -> + get_heap_base env + )) in + + E.add_export env (nr { + name = Wasm.Utf8.decode "get_heap_base"; + edesc = nr (FuncExport (nr get_heap_base_fn)) + }); + + Func.define_built_in env "get_heap_size" [] [I32Type] (fun env -> + get_heap_ptr env ^^ + get_heap_base env ^^ + G.i (Binary (Wasm.Values.I32 I32Op.Sub)) + ) + + let get_heap_size env = + G.i (Call (nr (E.built_in env "get_heap_size"))) + end (* Heap *) module Stack = struct @@ -4679,44 +4718,7 @@ module GC = struct E.add_export env (nr { name = Wasm.Utf8.decode "get_static_roots"; edesc = nr (FuncExport (nr get_static_roots)) - }); - - let get_hp = E.add_fun env "get_hp" (Func.of_body env [] [I32Type] (fun env -> - Heap.get_heap_ptr env - )) in - - E.add_export env (nr { - name = Wasm.Utf8.decode "get_hp"; - edesc = nr (FuncExport (nr get_hp)) - }); - - let set_hp = E.add_fun env "set_hp" (Func.of_body env [("new_hp", I32Type)] [] (fun env -> - G.i (LocalGet (nr (Int32.of_int 0))) ^^ - Heap.set_heap_ptr env - )) in - - E.add_export env (nr { - name = Wasm.Utf8.decode "set_hp"; - edesc = nr (FuncExport (nr set_hp)) - }); - - let get_heap_base = E.add_fun env "get_heap_base" (Func.of_body env [] [I32Type] (fun env -> - Heap.get_heap_base env - )) in - - E.add_export env (nr { - name = Wasm.Utf8.decode "get_heap_base"; - edesc = nr (FuncExport (nr get_heap_base)) - }); - - Func.define_built_in env "get_heap_size" [] [I32Type] (fun env -> - Heap.get_heap_ptr env ^^ - Heap.get_heap_base env ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Sub)) - ) - - let get_heap_size env = - G.i (Call (nr (E.built_in env "get_heap_size"))) + }) let store_static_roots env = Arr.vanilla_lit env (E.get_static_roots env) @@ -6771,7 +6773,7 @@ and compile_exp (env : E.t) ae exp = | OtherPrim "rts_heap_size", [] -> SR.Vanilla, - GC.get_heap_size env ^^ Prim.prim_word32toNat env + Heap.get_heap_size env ^^ Prim.prim_word32toNat env | OtherPrim "rts_memory_size", [] -> SR.Vanilla, @@ -7612,6 +7614,7 @@ and conclude_module env start_fi_o = let set_heap_base = E.add_global32_delayed env "__heap_base" Immutable in E.export_global env "__heap_base"; + Heap.register env; GC.register env static_roots; set_heap_base (E.get_end_of_static_memory env); From c99c3fe2019c0eb7c72a3cf56dc56a47e9a25f38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Wed, 2 Sep 2020 13:14:53 +0300 Subject: [PATCH 05/20] Remove unused globals in code gen --- src/codegen/compile.ml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index 77cef522a7c..e7f2ecad9ee 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -834,13 +834,7 @@ module Heap = struct let register_globals env = (* end-of-heap pointer, we set this to __heap_base upon start *) - E.add_global32 env "end_of_heap" Mutable 0xDEADBEEFl; - - (* counter for total reclaimed bytes *) - E.add_global64 env "reclaimed" Mutable 0L; - - (* counter for max live bytes *) - E.add_global64 env "max_live" Mutable 0L + E.add_global32 env "end_of_heap" Mutable 0xDEADBEEFl let get_total_allocation env = E.call_import env "rts" "get_total_allocations" From b413d98e8f41c838defb6a3f9b26bae1ab779d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Wed, 2 Sep 2020 13:16:39 +0300 Subject: [PATCH 06/20] Remove unused function add_global64 --- src/codegen/compile.ml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index e7f2ecad9ee..26b0dec4831 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -341,12 +341,6 @@ module E = struct let add_global32 (env : t) name mut init = add_global32_delayed env name mut init - let add_global64 (env : t) name mut init = - add_global env name (Lib.Promise.make_fulfilled ( - nr { gtype = GlobalType (I64Type, mut); - value = nr (G.to_instr_list (G.i (Const (nr (Wasm.Values.I64 init))))) - })) - let get_global (env : t) name : int32 = match NameEnv.find_opt name !(env.global_names) with | Some gi -> gi From 755a096a32f53add4ee21779865f09786c9e5fcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Wed, 2 Sep 2020 13:22:37 +0300 Subject: [PATCH 07/20] Panic if we see an indirection in scavenge --- rts/motoko-rts/src/gc.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index a3265dadbbb..05aa994e807 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -331,12 +331,7 @@ unsafe fn scav( // These don't include pointers, skip } - TAG_INDIRECTION => { - // These are ignored in the original code for some reason - // TODO (osa): I think this branch should panic - } - - _ => { + TAG_INDIRECTION | _ => { // Any other tag is a bug rts_trap_with("invalid object tag in scav\0".as_ptr()); } From ee53aa05d7cb110d16947416a4b9b00bc3ef2bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Wed, 2 Sep 2020 13:37:48 +0300 Subject: [PATCH 08/20] Pass mut ref to end_to_space and update it directly --- rts/motoko-rts/src/gc.rs | 81 ++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index 05aa994e807..23d8dfebef6 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -128,8 +128,8 @@ unsafe fn memset(s: usize, c: Words, b: u32) { } } -/// Evacuate (copy) an object in from-space to to-space, return updated end_to_space. -/// If the object was already evacuated, it returns end_to_space unchanged. +/// Evacuate (copy) an object in from-space to to-space, update end_to_space. If the object was +/// already evacuated end_to_space is not changed. /// /// Arguments: /// @@ -151,19 +151,19 @@ unsafe fn memset(s: usize, c: Words, b: u32) { unsafe fn evac( begin_from_space: usize, begin_to_space: usize, - end_to_space: usize, + end_to_space: &mut usize, ptr_loc: usize, -) -> usize { +) { // Field holds a skewed pointer to the object to evacuate let ptr_loc = ptr_loc as *mut SkewedPtr; if is_tagged_scalar(*ptr_loc) { - return end_to_space; + return; } // Ignore static objects, they can't point to dynamic heap if (*ptr_loc).unskew() < begin_from_space { - return end_to_space; + return; } let obj = (*ptr_loc).unskew() as *mut Obj; @@ -172,20 +172,20 @@ unsafe fn evac( if (*obj).tag == TAG_INDIRECTION { let fwd = (*(obj as *const Indirection)).fwd; *ptr_loc = fwd; - return end_to_space; + return; } let obj_size = object_size(obj as usize); let obj_size_bytes = words_to_bytes(obj_size); // Grow memory if needed - alloc::grow_memory(end_to_space + obj_size_bytes.0 as usize); + alloc::grow_memory(*end_to_space + obj_size_bytes.0 as usize); // Copy object to to-space - memcpy_words(end_to_space, obj as usize, obj_size); + memcpy_words(*end_to_space, obj as usize, obj_size); // Final location of the object after copying to-space back to from-space - let obj_loc = (end_to_space - begin_to_space) + begin_from_space; + let obj_loc = (*end_to_space - begin_to_space) + begin_from_space; // Set forwarding pointer let fwd = obj as *mut Indirection; @@ -195,8 +195,8 @@ unsafe fn evac( // Update evacuated field *ptr_loc = skew(obj_loc); - // Return new end of to-space - end_to_space + obj_size_bytes.0 as usize + // Update end of to-space + *end_to_space += obj_size_bytes.0 as usize } /// Evacuate a blob payload pointed by a bigint. bigints are special in that a bigint's first field @@ -207,9 +207,9 @@ unsafe fn evac( unsafe fn evac_bigint_blob( begin_from_space: usize, begin_to_space: usize, - end_to_space: usize, + end_to_space: &mut usize, ptr_loc: *mut usize, // address of field with a pointer to a blob payload -) -> usize { +) { let blob_payload_addr = *ptr_loc; // Get blob object from the payload @@ -218,7 +218,7 @@ unsafe fn evac_bigint_blob( let blob_obj_addr_field = &mut blob_obj_addr; let blob_obj_addr_field_ptr = blob_obj_addr_field as *mut _; - let ret = evac( + evac( begin_from_space, begin_to_space, end_to_space, @@ -231,16 +231,14 @@ unsafe fn evac_bigint_blob( // Update evacuated field *ptr_loc = blob_new_payload_addr; // not skewed! - - ret } unsafe fn scav( begin_from_space: usize, begin_to_space: usize, - mut end_to_space: usize, + end_to_space: &mut usize, obj: usize, -) -> usize { +) { let obj = obj as *const Obj; match (*obj).tag { @@ -248,7 +246,7 @@ unsafe fn scav( let obj = obj as *mut Object; let obj_payload = obj.offset(1) as *mut SkewedPtr; for i in 0..(*obj).size as isize { - end_to_space = evac( + evac( begin_from_space, begin_to_space, end_to_space, @@ -261,7 +259,7 @@ unsafe fn scav( let array = obj as *mut Array; let array_payload = array.offset(1) as *mut SkewedPtr; for i in 0..(*array).len as isize { - end_to_space = evac( + evac( begin_from_space, begin_to_space, end_to_space, @@ -273,14 +271,14 @@ unsafe fn scav( TAG_MUTBOX => { let mutbox = obj as *mut MutBox; let field_addr = ((&mut (*mutbox).field) as *mut _) as usize; - end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field_addr); + evac(begin_from_space, begin_to_space, end_to_space, field_addr); } TAG_CLOSURE => { let closure = obj as *mut Closure; let closure_payload = closure.offset(1) as *mut SkewedPtr; for i in 0..(*closure).size as isize { - end_to_space = evac( + evac( begin_from_space, begin_to_space, end_to_space, @@ -292,20 +290,20 @@ unsafe fn scav( TAG_SOME => { let some = obj as *mut Some; let field_addr = ((&mut (*some).field) as *mut _) as usize; - end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field_addr); + evac(begin_from_space, begin_to_space, end_to_space, field_addr); } TAG_VARIANT => { let variant = obj as *mut Variant; let field_addr = ((&mut (*variant).field) as *mut _) as usize; - end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field_addr); + evac(begin_from_space, begin_to_space, end_to_space, field_addr); } TAG_BIGINT => { let bigint = obj as *mut BigInt; let data_ptr_addr = (&mut (*bigint).data_ptr) as *mut _; - end_to_space = evac_bigint_blob( + evac_bigint_blob( begin_from_space, begin_to_space, end_to_space, @@ -316,15 +314,15 @@ unsafe fn scav( TAG_CONCAT => { let concat = obj as *mut Concat; let field1_addr = ((&mut (*concat).text1) as *mut _) as usize; - end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field1_addr); + evac(begin_from_space, begin_to_space, end_to_space, field1_addr); let field2_addr = ((&mut (*concat).text2) as *mut _) as usize; - end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field2_addr); + evac(begin_from_space, begin_to_space, end_to_space, field2_addr); } TAG_OBJ_IND => { let obj_ind = obj as *mut ObjInd; let field_addr = ((&mut (*obj_ind).field) as *mut _) as usize; - end_to_space = evac(begin_from_space, begin_to_space, end_to_space, field_addr); + evac(begin_from_space, begin_to_space, end_to_space, field_addr); } TAG_BITS64 | TAG_BITS32 | TAG_BLOB => { @@ -336,8 +334,6 @@ unsafe fn scav( rts_trap_with("invalid object tag in scav\0".as_ptr()); } } - - end_to_space } // We have a special evacuation routine for "static roots" array: we don't evacuate elements of @@ -345,15 +341,14 @@ unsafe fn scav( unsafe fn evac_static_roots( begin_from_space: usize, begin_to_space: usize, - mut end_to_space: usize, + end_to_space: &mut usize, roots: *const Array, -) -> usize { +) { // Roots are in a static array which we don't evacuate. Only evacuate elements. for i in 0..(*roots).len { let obj = SkewedPtr(array_get(roots, i) as usize); - end_to_space = scav(begin_from_space, begin_to_space, end_to_space, obj.unskew()); + scav(begin_from_space, begin_to_space, end_to_space, obj.unskew()); } - end_to_space } /// The entry point. Called by the generated code. @@ -367,19 +362,25 @@ pub unsafe extern "C" fn collect() { let static_roots = get_static_roots().unskew() as *const Array; // Evacuate roots - end_to_space = evac_static_roots(begin_from_space, begin_to_space, end_to_space, static_roots); + evac_static_roots( + begin_from_space, + begin_to_space, + &mut end_to_space, + static_roots, + ); - end_to_space = evac( + evac( begin_from_space, begin_to_space, - end_to_space, + &mut end_to_space, closure_table_loc().unskew(), ); // Scavenge to-space let mut p = begin_to_space; - while p < end_to_space { // NB: end_to_space keeps changing within this loop - end_to_space = scav(begin_from_space, begin_to_space, end_to_space, p); + while p < end_to_space { + // NB: end_to_space keeps changing within this loop + scav(begin_from_space, begin_to_space, &mut end_to_space, p); p += words_to_bytes(object_size(p)).0 as usize; } From e59f35b89d036e707f6c47b9d8291c7364de5831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Wed, 2 Sep 2020 15:06:19 +0300 Subject: [PATCH 09/20] Remove memset after GC for now -- consumes too much gas --- rts/motoko-rts/src/gc.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index 23d8dfebef6..46e8f0a4e6d 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -121,13 +121,6 @@ unsafe fn memcpy_bytes(to: usize, from: usize, n: Bytes) { as_memcpy(to, from, n) } -unsafe fn memset(s: usize, c: Words, b: u32) { - let s_ptr = s as *mut u32; - for i in 0..c.0 { - *s_ptr.offset(i as isize) = b; - } -} - /// Evacuate (copy) an object in from-space to to-space, update end_to_space. If the object was /// already evacuated end_to_space is not changed. /// @@ -401,11 +394,4 @@ pub unsafe extern "C" fn collect() { // Reset the heap pointer let new_hp = begin_from_space + (end_to_space - begin_to_space); set_hp(new_hp); - - // Reset scratch space (for debugging purposes) - memset( - new_hp, - bytes_to_words(Bytes((end_to_space - new_hp) as u32)), - 0, - ); } From 3da969c122853790eabc67c7dab0bbcee007cc44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Wed, 2 Sep 2020 16:07:32 +0300 Subject: [PATCH 10/20] Use memcpy from libc Also tweaks the build system a little bit to avoid linking multiple versions of compiler_builtins. --- rts/Makefile | 1 + rts/bigint.c | 2 +- rts/motoko-rts/Cargo.toml | 12 +++++++++++- rts/motoko-rts/Xargo.toml | 1 - rts/motoko-rts/src/gc.rs | 7 ++----- rts/principal.c | 2 +- rts/rts.c | 10 ++-------- rts/rts.h | 1 - rts/text.c | 8 ++++---- rts/utf8_valid.c | 2 +- src/codegen/compile.ml | 4 ++-- 11 files changed, 25 insertions(+), 25 deletions(-) diff --git a/rts/Makefile b/rts/Makefile index 481bdce8d5d..a2a21d8822e 100644 --- a/rts/Makefile +++ b/rts/Makefile @@ -177,6 +177,7 @@ mo-rts.wasm: $(RTS_RUST_WASM_O) $(RTS_WASM_O) $(TOMMATH_WASM_O) $(MUSL_WASM_O) $(WASM_LD) -o $@ \ --import-memory --shared --no-entry --gc-sections \ --export=__wasm_call_ctors \ + --export=memcpy \ --whole-archive \ $+ diff --git a/rts/bigint.c b/rts/bigint.c index 00ee1941acf..9af9e8e8c19 100644 --- a/rts/bigint.c +++ b/rts/bigint.c @@ -50,7 +50,7 @@ export void* mp_realloc(void *ptr, size_t old_size, size_t new_size) { if (new_size > FIELD(r, 1)) { void *newptr = mp_alloc(new_size); if (old_size != FIELD(r, 1)) bigint_trap(); - as_memcpy(newptr, ptr, old_size); + memcpy(newptr, ptr, old_size); return newptr; } else if (new_size == FIELD(r, 1)) { // No need to grow diff --git a/rts/motoko-rts/Cargo.toml b/rts/motoko-rts/Cargo.toml index 435fe343215..c3c2b244e4e 100644 --- a/rts/motoko-rts/Cargo.toml +++ b/rts/motoko-rts/Cargo.toml @@ -7,9 +7,19 @@ edition = "2018" [dependencies.libc] version = "0.2.73" -# added here so that it ends up in Cargo.lock, so that nix will pre-fetch it +# Added here so that it ends up in Cargo.lock, so that nix will pre-fetch it [dependencies.compiler_builtins] version = "0.1.32" +# Without this feature we get dozens of duplicate symbol errors when generating +# the final shared .wasm file: +# +# wasm-ld: error: duplicate symbol: __multi3 +# >>> defined in _build/wasm/libmotoko_rts.a(compiler_builtins-d709bd899857aa61.compiler_builtins.3abndchk-cgu.0.rcgu.o) +# >>> defined in _build/wasm/libmotoko_rts.a(compiler_builtins-06d1ead628e1f468.compiler_builtins.6moz1ltd-cgu.0.rcgu.o) +# +# It seems like we're linking multiple versions of compiler_builtins in the same +# shared library, which we should fix at some point. TODO +features = ["mangled-names"] [lib] crate-type = ["staticlib"] diff --git a/rts/motoko-rts/Xargo.toml b/rts/motoko-rts/Xargo.toml index 511af6a58fa..2fdd43a522b 100644 --- a/rts/motoko-rts/Xargo.toml +++ b/rts/motoko-rts/Xargo.toml @@ -3,5 +3,4 @@ stage = 0 [dependencies.compiler_builtins] stage = 1 -features = [ "mem" ] version = "0.1.32" diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index 46e8f0a4e6d..82006cd7ee4 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -18,9 +18,6 @@ extern "C" { /// Get pointer to the static memory with an array to the static roots. Provided by the /// generated code. pub(crate) fn get_static_roots() -> SkewedPtr; - - /// Provided by the C RTS, in `rts.c`. - pub(crate) fn as_memcpy(to: usize, from: usize, n: Bytes); } /// Maximum live data retained in a GC. @@ -114,11 +111,11 @@ pub(crate) fn is_tagged_scalar(p: SkewedPtr) -> bool { } unsafe fn memcpy_words(to: usize, from: usize, n: Words) { - as_memcpy(to, from, words_to_bytes(n)) + libc::memcpy(to as *mut _, from as *const _, words_to_bytes(n).0 as usize); } unsafe fn memcpy_bytes(to: usize, from: usize, n: Bytes) { - as_memcpy(to, from, n) + libc::memcpy(to as *mut _, from as *const _, n.0 as usize); } /// Evacuate (copy) an object in from-space to to-space, update end_to_space. If the object was diff --git a/rts/principal.c b/rts/principal.c index ccd9192aa41..ef620cca907 100644 --- a/rts/principal.c +++ b/rts/principal.c @@ -186,7 +186,7 @@ export blob_t blob_of_principal(text_t t) { rts_trap_with("blob_of_principal: principal too short"); } blob_t stripped = alloc_blob(BLOB_LEN(bytes) - 4); - as_memcpy(BLOB_PAYLOAD(stripped), BLOB_PAYLOAD(bytes) + 4, BLOB_LEN(bytes) - 4); + memcpy(BLOB_PAYLOAD(stripped), BLOB_PAYLOAD(bytes) + 4, BLOB_LEN(bytes) - 4); // check encoding blob_t expected = principal_of_blob(stripped); if (blob_compare(b0, expected) != 0) { diff --git a/rts/rts.c b/rts/rts.c index a65c9c4b27a..8210870afb2 100644 --- a/rts/rts.c +++ b/rts/rts.c @@ -12,12 +12,6 @@ char *alloc(size_t n) { return (char *)&FIELD(r,2); } -export void as_memcpy(char *str1, const char *str2, size_t n) { - for (size_t i = 0; i < n; i++) { - str1[i] = str2[i]; - } -} - export int as_memcmp(const char *str1, const char *str2, size_t n) { for (size_t i = 0; i < n; i++) { if (str1[i] != str2[i]) @@ -36,8 +30,8 @@ void __attribute__ ((noreturn)) trap_with_prefix(const char* prefix, const char int len1 = as_strlen(prefix); int len2 = as_strlen(str); char msg[len1 + len2]; - as_memcpy(msg, prefix, len1); - as_memcpy(msg + len1, str, len2); + memcpy(msg, prefix, len1); + memcpy(msg + len1, str, len2); rts_trap(msg, len1 + len2); } diff --git a/rts/rts.h b/rts/rts.h index 9d6aa210556..b4ace6a589c 100644 --- a/rts/rts.h +++ b/rts/rts.h @@ -84,7 +84,6 @@ from_rts __attribute__ ((noreturn)) void rts_trap(const char* str, size_t n); from_rts __attribute__ ((noreturn)) void bigint_trap(); /** Functions used in multiple modules of the RTS */ -export void as_memcpy(char *str1, const char *str2, size_t n); export int as_memcmp(const char *str1, const char *str2, size_t n); export size_t as_strlen(const char *str1); diff --git a/rts/text.c b/rts/text.c index 5c8cbff32f4..a927cf4d17a 100644 --- a/rts/text.c +++ b/rts/text.c @@ -54,7 +54,7 @@ static blob_t alloc_text_blob(size_t n) { // Create export text_t text_of_ptr_size(const char *buf, size_t n) { as_ptr r = alloc_text_blob(n); - as_memcpy(BLOB_PAYLOAD(r), buf, n); + memcpy(BLOB_PAYLOAD(r), buf, n); return r; } @@ -75,8 +75,8 @@ export text_t text_concat(text_t s1, text_t s2) { // short texts are copied into a single blob if (n < MIN_CONCAT_SIZE) { as_ptr r = alloc_text_blob(n1 + n2); - as_memcpy(BLOB_PAYLOAD(r), BLOB_PAYLOAD(s1), n1); - as_memcpy(BLOB_PAYLOAD(r) + n1, BLOB_PAYLOAD(s2), n2); + memcpy(BLOB_PAYLOAD(r), BLOB_PAYLOAD(s1), n1); + memcpy(BLOB_PAYLOAD(r) + n1, BLOB_PAYLOAD(s2), n2); return r; } // Check max size @@ -106,7 +106,7 @@ export void text_to_buf(text_t s, char *buf) { crumb *next_crumb = NULL; // what do do after we are done with s while (true) { if (TAG(s) == TAG_BLOB) { - as_memcpy(buf, BLOB_PAYLOAD(s), BLOB_LEN(s)); + memcpy(buf, BLOB_PAYLOAD(s), BLOB_LEN(s)); // return if we are done if (next_crumb == NULL) return; diff --git a/rts/utf8_valid.c b/rts/utf8_valid.c index 40ee982da1d..18a5a55c1eb 100644 --- a/rts/utf8_valid.c +++ b/rts/utf8_valid.c @@ -76,7 +76,7 @@ utf8_check(const char *src, size_t len, size_t *cursor) { if (cur == end) break; buf[0] = buf[1] = buf[2] = buf[3] = 0; - as_memcpy((char *)buf, (const char *)cur, end - cur); + memcpy((char *)buf, (const char *)cur, end - cur); p = (const unsigned char *)buf; } else { p = cur; diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index 26b0dec4831..682d0647cb5 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -712,7 +712,7 @@ end (* Func *) module RTS = struct (* The connection to the C parts of the RTS *) let system_imports env = - E.add_func_import env "rts" "as_memcpy" [I32Type; I32Type; I32Type] []; + E.add_func_import env "rts" "memcpy" [I32Type; I32Type; I32Type] [I32Type]; (* standard libc memcpy *) E.add_func_import env "rts" "as_memcmp" [I32Type; I32Type; I32Type] [I32Type]; E.add_func_import env "rts" "version" [] [I32Type]; E.add_func_import env "rts" "parse_idl_header" [I32Type; I32Type; I32Type; I32Type] []; @@ -904,7 +904,7 @@ module Heap = struct (* Convenience functions related to memory *) (* Copying bytes (works on unskewed memory addresses) *) - let memcpy env = E.call_import env "rts" "as_memcpy" + let memcpy env = E.call_import env "rts" "memcpy" ^^ G.i Drop (* Comparing bytes (works on unskewed memory addresses) *) let memcmp env = E.call_import env "rts" "as_memcmp" From 2b2a8f7e6103612f118803fb5d78257e1daa2859 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 11:07:05 +0300 Subject: [PATCH 11/20] Encapsulate offset(1) code Rust doesn't allow implementing methods on pointer types so we have to use associated functions. --- rts/motoko-rts/src/gc.rs | 6 +++--- rts/motoko-rts/src/types.rs | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index 82006cd7ee4..68ea54848a0 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -234,7 +234,7 @@ unsafe fn scav( match (*obj).tag { TAG_OBJECT => { let obj = obj as *mut Object; - let obj_payload = obj.offset(1) as *mut SkewedPtr; + let obj_payload = Object::payload_addr(obj); for i in 0..(*obj).size as isize { evac( begin_from_space, @@ -247,7 +247,7 @@ unsafe fn scav( TAG_ARRAY => { let array = obj as *mut Array; - let array_payload = array.offset(1) as *mut SkewedPtr; + let array_payload = Array::payload_addr(array); for i in 0..(*array).len as isize { evac( begin_from_space, @@ -266,7 +266,7 @@ unsafe fn scav( TAG_CLOSURE => { let closure = obj as *mut Closure; - let closure_payload = closure.offset(1) as *mut SkewedPtr; + let closure_payload = Closure::payload_addr(closure); for i in 0..(*closure).size as isize { evac( begin_from_space, diff --git a/rts/motoko-rts/src/types.rs b/rts/motoko-rts/src/types.rs index 39317c7e552..81b3cdea013 100644 --- a/rts/motoko-rts/src/types.rs +++ b/rts/motoko-rts/src/types.rs @@ -70,6 +70,12 @@ pub struct Array { // https://doc.rust-lang.org/nomicon/exotic-sizes.html } +impl Array { + pub unsafe fn payload_addr(arr: *mut Array) -> *mut SkewedPtr { + arr.offset(1) as *mut SkewedPtr // skip array header + } +} + pub(crate) unsafe fn array_get(array: *const Array, idx: u32) -> u32 { let slot_addr = array.offset(1) as usize + (idx * WORD_SIZE) as usize; *(slot_addr as *const u32) @@ -83,6 +89,12 @@ pub struct Object { // other stuff follows, but we don't need them currently } +impl Object { + pub unsafe fn payload_addr(obj: *mut Object) -> *mut SkewedPtr { + obj.offset(1) as *mut SkewedPtr // skip object header + } +} + #[repr(C)] pub struct ObjInd { pub header: Obj, @@ -97,6 +109,12 @@ pub struct Closure { // other stuff follows ... } +impl Closure { + pub unsafe fn payload_addr(clo: *mut Closure) -> *mut SkewedPtr { + clo.offset(1) as *mut SkewedPtr // skip closure header + } +} + #[repr(C)] pub struct Blob { pub header: Obj, From 3d3a9bedf562ab1a06e2f7e74b5821df78071a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 11:54:21 +0300 Subject: [PATCH 12/20] Implement some helper methods for Word and Bytes types - Add and AddAssign to support + and += - Conversion methods instead of functions and From --- rts/motoko-rts/src/alloc.rs | 8 ++--- rts/motoko-rts/src/gc.rs | 26 ++++++++------ rts/motoko-rts/src/types.rs | 68 +++++++++++++++++++++++++++++++------ 3 files changed, 77 insertions(+), 25 deletions(-) diff --git a/rts/motoko-rts/src/alloc.rs b/rts/motoko-rts/src/alloc.rs index a8a0d1de432..f15985b0301 100644 --- a/rts/motoko-rts/src/alloc.rs +++ b/rts/motoko-rts/src/alloc.rs @@ -4,18 +4,18 @@ use core::arch::wasm32; use crate::gc; use crate::rts_trap_with; -use crate::types::{bytes_to_words, skew, words_to_bytes, Bytes, SkewedPtr, Words}; +use crate::types::{skew, Bytes, SkewedPtr, Words}; #[no_mangle] pub unsafe extern "C" fn alloc_bytes(n: Bytes) -> SkewedPtr { - alloc_words(bytes_to_words(n)) + alloc_words(n.to_words()) } #[no_mangle] pub unsafe extern "C" fn alloc_words(n: Words) -> SkewedPtr { - let bytes = words_to_bytes(n); + let bytes = n.to_bytes(); // Update ALLOCATED - gc::ALLOCATED.0 += bytes.0 as u64; + gc::ALLOCATED += Bytes(bytes.0 as u64); // Update heap pointer let old_hp = gc::get_hp(); diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index 68ea54848a0..3578f9a63ef 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -21,7 +21,6 @@ extern "C" { } /// Maximum live data retained in a GC. -// static mut MAX_LIVE: Bytes = Bytes(0); /// Amount of garbage collected so far. @@ -31,7 +30,7 @@ static mut RECLAIMED: Bytes = Bytes(0); pub(crate) static mut ALLOCATED: Bytes = Bytes(0); unsafe fn note_live_size(live: Bytes) { - MAX_LIVE = Bytes(::core::cmp::max(MAX_LIVE.0, live.0)); + MAX_LIVE = ::core::cmp::max(MAX_LIVE, live); } #[no_mangle] @@ -40,7 +39,7 @@ unsafe extern "C" fn get_max_live_size() -> Bytes { } unsafe fn note_reclaimed(reclaimed: Bytes) { - RECLAIMED.0 += reclaimed.0 as u64; + RECLAIMED += Bytes(reclaimed.0 as u64); } #[no_mangle] @@ -55,12 +54,19 @@ unsafe extern "C" fn get_total_allocations() -> Bytes { /// Returns object size in words pub(crate) unsafe fn object_size(obj: usize) -> Words { + + // NB. Constants below are header sizes of objects and should be in sync with sizes of structs + // in types.rs. TODO: Some ideas to make sure they're in sync: + // + // - Define constants, use static_assertions to chec size_of and the constant values agree. + // - Use size_of directly + let obj = obj as *const Obj; match (*obj).tag { TAG_OBJECT => { let object = obj as *const Object; let size = (*object).size; - Words(size + 3) // TODO: document what "3" includes + Words(size + 3) } TAG_OBJ_IND => Words(2), @@ -68,7 +74,7 @@ pub(crate) unsafe fn object_size(obj: usize) -> Words { TAG_ARRAY => { let array = obj as *const Array; let size = (*array).len; - Words(size + 2) // TODO: document what "2" includes + Words(size + 2) } TAG_BITS64 => Words(3), @@ -78,7 +84,7 @@ pub(crate) unsafe fn object_size(obj: usize) -> Words { TAG_CLOSURE => { let closure = obj as *const Closure; let size = (*closure).size; - Words(size + 3) // TODO: document what "3" includes + Words(size + 3) } TAG_SOME => Words(2), @@ -87,7 +93,7 @@ pub(crate) unsafe fn object_size(obj: usize) -> Words { TAG_BLOB => { let blob = obj as *const Blob; - Words(bytes_to_words((*blob).len).0 + 2) // TODO: document this + Words((*blob).len.to_words().0 + 2) } TAG_INDIRECTION => { @@ -111,7 +117,7 @@ pub(crate) fn is_tagged_scalar(p: SkewedPtr) -> bool { } unsafe fn memcpy_words(to: usize, from: usize, n: Words) { - libc::memcpy(to as *mut _, from as *const _, words_to_bytes(n).0 as usize); + libc::memcpy(to as *mut _, from as *const _, n.to_bytes().0 as usize); } unsafe fn memcpy_bytes(to: usize, from: usize, n: Bytes) { @@ -166,7 +172,7 @@ unsafe fn evac( } let obj_size = object_size(obj as usize); - let obj_size_bytes = words_to_bytes(obj_size); + let obj_size_bytes = obj_size.to_bytes(); // Grow memory if needed alloc::grow_memory(*end_to_space + obj_size_bytes.0 as usize); @@ -371,7 +377,7 @@ pub unsafe extern "C" fn collect() { while p < end_to_space { // NB: end_to_space keeps changing within this loop scav(begin_from_space, begin_to_space, &mut end_to_space, p); - p += words_to_bytes(object_size(p)).0 as usize; + p += object_size(p).to_bytes().0 as usize; } // Note the stats diff --git a/rts/motoko-rts/src/types.rs b/rts/motoko-rts/src/types.rs index 81b3cdea013..d473ea12997 100644 --- a/rts/motoko-rts/src/types.rs +++ b/rts/motoko-rts/src/types.rs @@ -1,25 +1,71 @@ +use core::ops::{Add, AddAssign}; + pub const WORD_SIZE: u32 = 4; -pub fn words_to_bytes(words: Words) -> Bytes { - Bytes(words.0 * WORD_SIZE) +/// The unit "words": `Words(123u32)` means 123 words. +#[repr(C)] +#[derive(PartialEq, Eq, Clone, Copy, PartialOrd, Ord)] +pub struct Words(pub A); + +impl Words { + pub fn to_bytes(self) -> Bytes { + Bytes(self.0 * WORD_SIZE) + } } -// Rounds up -pub fn bytes_to_words(bytes: Bytes) -> Words { - // Rust issue for adding ceiling_div: https://github.com/rust-lang/rfcs/issues/2844 - Words((bytes.0 + WORD_SIZE - 1) / WORD_SIZE) +impl> Add for Words { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Words(self.0 + rhs.0) + } } -/// The unit "words": `Words(123u32)` means 123 words. -#[repr(C)] -#[derive(PartialEq, Eq, Clone, Copy)] -pub struct Words(pub A); +impl AddAssign for Words { + fn add_assign(&mut self, rhs: Self) { + self.0 += rhs.0; + } +} + +impl From> for Words { + fn from(bytes: Bytes) -> Words { + bytes.to_words() + } +} /// The unit "bytes": `Bytes(123u32)` means 123 bytes. #[repr(C)] -#[derive(PartialEq, Eq, Clone, Copy)] +#[derive(PartialEq, Eq, Clone, Copy, PartialOrd, Ord)] pub struct Bytes(pub A); +impl Bytes { + // Rounds up + pub fn to_words(self) -> Words { + // Rust issue for adding ceiling_div: https://github.com/rust-lang/rfcs/issues/2844 + Words((self.0 + WORD_SIZE - 1) / WORD_SIZE) + } +} + +impl> Add for Bytes { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Bytes(self.0 + rhs.0) + } +} + +impl AddAssign for Bytes { + fn add_assign(&mut self, rhs: Self) { + self.0 += rhs.0; + } +} + +impl From> for Bytes { + fn from(words: Words) -> Bytes { + words.to_bytes() + } +} + #[repr(C)] #[derive(Clone, Copy)] pub struct SkewedPtr(pub usize); From bd13ac9571587b9f76b11bc06a525f149bec4e4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 11:58:30 +0300 Subject: [PATCH 13/20] Tweak pub modifiers --- rts/motoko-rts/src/alloc.rs | 4 ++-- rts/motoko-rts/src/gc.rs | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/rts/motoko-rts/src/alloc.rs b/rts/motoko-rts/src/alloc.rs index f15985b0301..5bccc1d2e90 100644 --- a/rts/motoko-rts/src/alloc.rs +++ b/rts/motoko-rts/src/alloc.rs @@ -7,12 +7,12 @@ use crate::rts_trap_with; use crate::types::{skew, Bytes, SkewedPtr, Words}; #[no_mangle] -pub unsafe extern "C" fn alloc_bytes(n: Bytes) -> SkewedPtr { +unsafe extern "C" fn alloc_bytes(n: Bytes) -> SkewedPtr { alloc_words(n.to_words()) } #[no_mangle] -pub unsafe extern "C" fn alloc_words(n: Words) -> SkewedPtr { +unsafe extern "C" fn alloc_words(n: Words) -> SkewedPtr { let bytes = n.to_bytes(); // Update ALLOCATED gc::ALLOCATED += Bytes(bytes.0 as u64); diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index 3578f9a63ef..d2eeaf67eaf 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -10,14 +10,14 @@ extern "C" { pub(crate) fn set_hp(hp: usize); /// Get __heap_base - pub(crate) fn get_heap_base() -> usize; + fn get_heap_base() -> usize; /// Skewed pointer to a skewed pointer to an array. See closure-table.c for details. - pub(crate) fn closure_table_loc() -> SkewedPtr; + fn closure_table_loc() -> SkewedPtr; /// Get pointer to the static memory with an array to the static roots. Provided by the /// generated code. - pub(crate) fn get_static_roots() -> SkewedPtr; + fn get_static_roots() -> SkewedPtr; } /// Maximum live data retained in a GC. @@ -53,8 +53,7 @@ unsafe extern "C" fn get_total_allocations() -> Bytes { } /// Returns object size in words -pub(crate) unsafe fn object_size(obj: usize) -> Words { - +unsafe fn object_size(obj: usize) -> Words { // NB. Constants below are header sizes of objects and should be in sync with sizes of structs // in types.rs. TODO: Some ideas to make sure they're in sync: // @@ -112,7 +111,7 @@ pub(crate) unsafe fn object_size(obj: usize) -> Words { } } -pub(crate) fn is_tagged_scalar(p: SkewedPtr) -> bool { +fn is_tagged_scalar(p: SkewedPtr) -> bool { p.0 & 0b1 == 0 } @@ -349,7 +348,7 @@ unsafe fn evac_static_roots( /// The entry point. Called by the generated code. #[no_mangle] -pub unsafe extern "C" fn collect() { +unsafe extern "C" fn collect() { let begin_from_space = get_heap_base(); let end_from_space = get_hp(); let begin_to_space = end_from_space; From 59890cc832a3b0da821e064181c1bb8a8327186a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 12:40:26 +0300 Subject: [PATCH 14/20] Remove magic values in object_size, refactor Bytes/Words a little bit more, fix BigInt layout - I figured out how to implement methods on pointer types, make payload_addr a method on pointers - Fix BigInt layout (sign field was missing) - Remove magic values in object_size --- rts/motoko-rts/src/gc.rs | 38 ++++++++++++++++--------------------- rts/motoko-rts/src/lib.rs | 1 + rts/motoko-rts/src/types.rs | 25 ++++++++++++++---------- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index d2eeaf67eaf..ca528873bef 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -54,12 +54,6 @@ unsafe extern "C" fn get_total_allocations() -> Bytes { /// Returns object size in words unsafe fn object_size(obj: usize) -> Words { - // NB. Constants below are header sizes of objects and should be in sync with sizes of structs - // in types.rs. TODO: Some ideas to make sure they're in sync: - // - // - Define constants, use static_assertions to chec size_of and the constant values agree. - // - Use size_of directly - let obj = obj as *const Obj; match (*obj).tag { TAG_OBJECT => { @@ -68,31 +62,31 @@ unsafe fn object_size(obj: usize) -> Words { Words(size + 3) } - TAG_OBJ_IND => Words(2), + TAG_OBJ_IND => size_of::(), TAG_ARRAY => { let array = obj as *const Array; let size = (*array).len; - Words(size + 2) + size_of::() + Words(size) } TAG_BITS64 => Words(3), - TAG_MUTBOX => Words(2), + TAG_MUTBOX => size_of::(), TAG_CLOSURE => { let closure = obj as *const Closure; let size = (*closure).size; - Words(size + 3) + size_of::() + Words(size) } - TAG_SOME => Words(2), + TAG_SOME => size_of::(), - TAG_VARIANT => Words(3), + TAG_VARIANT => size_of::(), TAG_BLOB => { let blob = obj as *const Blob; - Words((*blob).len.to_words().0 + 2) + size_of::() + (*blob).len.to_words() } TAG_INDIRECTION => { @@ -101,9 +95,9 @@ unsafe fn object_size(obj: usize) -> Words { TAG_BITS32 => Words(2), - TAG_BIGINT => Words(5), + TAG_BIGINT => size_of::(), - TAG_CONCAT => Words(4), + TAG_CONCAT => size_of::(), _ => { rts_trap_with("Invalid object tag in object size\0".as_ptr()); @@ -238,8 +232,8 @@ unsafe fn scav( match (*obj).tag { TAG_OBJECT => { - let obj = obj as *mut Object; - let obj_payload = Object::payload_addr(obj); + let obj = obj as *const Object; + let obj_payload = obj.payload_addr(); for i in 0..(*obj).size as isize { evac( begin_from_space, @@ -251,8 +245,8 @@ unsafe fn scav( } TAG_ARRAY => { - let array = obj as *mut Array; - let array_payload = Array::payload_addr(array); + let array = obj as *const Array; + let array_payload = array.payload_addr(); for i in 0..(*array).len as isize { evac( begin_from_space, @@ -270,8 +264,8 @@ unsafe fn scav( } TAG_CLOSURE => { - let closure = obj as *mut Closure; - let closure_payload = Closure::payload_addr(closure); + let closure = obj as *const Closure; + let closure_payload = closure.payload_addr(); for i in 0..(*closure).size as isize { evac( begin_from_space, @@ -341,7 +335,7 @@ unsafe fn evac_static_roots( ) { // Roots are in a static array which we don't evacuate. Only evacuate elements. for i in 0..(*roots).len { - let obj = SkewedPtr(array_get(roots, i) as usize); + let obj = roots.get(i); scav(begin_from_space, begin_to_space, end_to_space, obj.unskew()); } } diff --git a/rts/motoko-rts/src/lib.rs b/rts/motoko-rts/src/lib.rs index 3cfc4defbbd..8fba91a50ab 100644 --- a/rts/motoko-rts/src/lib.rs +++ b/rts/motoko-rts/src/lib.rs @@ -2,6 +2,7 @@ //! utilities. #![no_std] +#![feature(arbitrary_self_types)] mod alloc; mod gc; diff --git a/rts/motoko-rts/src/types.rs b/rts/motoko-rts/src/types.rs index d473ea12997..bd6cf7dfc8e 100644 --- a/rts/motoko-rts/src/types.rs +++ b/rts/motoko-rts/src/types.rs @@ -1,5 +1,9 @@ use core::ops::{Add, AddAssign}; +pub fn size_of() -> Words { + Bytes(::core::mem::size_of::() as u32).to_words() +} + pub const WORD_SIZE: u32 = 4; /// The unit "words": `Words(123u32)` means 123 words. @@ -117,14 +121,14 @@ pub struct Array { } impl Array { - pub unsafe fn payload_addr(arr: *mut Array) -> *mut SkewedPtr { - arr.offset(1) as *mut SkewedPtr // skip array header + pub unsafe fn payload_addr(self: *const Self) -> *const SkewedPtr { + self.offset(1) as *const SkewedPtr // skip array header } -} -pub(crate) unsafe fn array_get(array: *const Array, idx: u32) -> u32 { - let slot_addr = array.offset(1) as usize + (idx * WORD_SIZE) as usize; - *(slot_addr as *const u32) + pub unsafe fn get(self: *const Self, idx: u32) -> SkewedPtr { + let slot_addr = self.payload_addr() as usize + (idx * WORD_SIZE) as usize; + *(slot_addr as *const SkewedPtr) + } } #[repr(C)] @@ -136,8 +140,8 @@ pub struct Object { } impl Object { - pub unsafe fn payload_addr(obj: *mut Object) -> *mut SkewedPtr { - obj.offset(1) as *mut SkewedPtr // skip object header + pub unsafe fn payload_addr(self: *const Self) -> *const SkewedPtr { + self.offset(1) as *const SkewedPtr // skip object header } } @@ -156,8 +160,8 @@ pub struct Closure { } impl Closure { - pub unsafe fn payload_addr(clo: *mut Closure) -> *mut SkewedPtr { - clo.offset(1) as *mut SkewedPtr // skip closure header + pub unsafe fn payload_addr(self: *const Self) -> *const SkewedPtr { + self.offset(1) as *const SkewedPtr // skip closure header } } @@ -182,6 +186,7 @@ pub struct BigInt { // (https://github.com/libtom/libtommath/blob/44ee82cd34d0524c171ffd0da70f83bba919aa38/tommath.h#L174-L179) pub size: u32, pub alloc: u32, + pub sign: u32, // Unskewed pointer to a blob payload. data_ptr - 2 (words) gives us the blob header. pub data_ptr: usize, } From 53f24e5b4bdd56928ff71ef190db2822e170a792 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 13:05:45 +0300 Subject: [PATCH 15/20] Use size_of in Object size as well --- rts/motoko-rts/src/gc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index ca528873bef..256cc9653a1 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -59,7 +59,7 @@ unsafe fn object_size(obj: usize) -> Words { TAG_OBJECT => { let object = obj as *const Object; let size = (*object).size; - Words(size + 3) + size_of::() + Words(size) } TAG_OBJ_IND => size_of::(), From 567475ee8a61f5837ea827fb56995224c9de853a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 13:09:20 +0300 Subject: [PATCH 16/20] Remove hard-coded blob header size --- rts/motoko-rts/src/gc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index 256cc9653a1..17d7e7fee66 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -202,7 +202,7 @@ unsafe fn evac_bigint_blob( let blob_payload_addr = *ptr_loc; // Get blob object from the payload - let mut blob_obj_addr = skew(blob_payload_addr - 2 * (WORD_SIZE as usize)); + let mut blob_obj_addr = skew(blob_payload_addr - size_of::().0 as usize); // Create a temporary field to the blob object, to be passed to `evac`. let blob_obj_addr_field = &mut blob_obj_addr; let blob_obj_addr_field_ptr = blob_obj_addr_field as *mut _; From 1f8b54a4d8efa362e1bc5a2a75e06b2281331b87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 13:45:22 +0300 Subject: [PATCH 17/20] Update Object.hash_ptr comments --- rts/motoko-rts/src/types.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rts/motoko-rts/src/types.rs b/rts/motoko-rts/src/types.rs index bd6cf7dfc8e..3b3842c44af 100644 --- a/rts/motoko-rts/src/types.rs +++ b/rts/motoko-rts/src/types.rs @@ -135,8 +135,8 @@ impl Array { pub struct Object { pub header: Obj, pub size: u32, - pub hash_ptr: u32, // TODO: Not sure how this is used, we don't scavenge this field in GC - // other stuff follows, but we don't need them currently + pub hash_ptr: u32, // Pointer to static information about object field labels. Not important + // for GC (does not contain pointers). } impl Object { From a8f1c9071e52528a1c3ffc312a0fcc5c387ae220 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 14:30:39 +0300 Subject: [PATCH 18/20] Move heap pointer global to the RTS --- rts/motoko-rts/src/alloc.rs | 10 ++++----- rts/motoko-rts/src/gc.rs | 29 ++++++++++++++++---------- src/codegen/compile.ml | 41 +++++-------------------------------- 3 files changed, 28 insertions(+), 52 deletions(-) diff --git a/rts/motoko-rts/src/alloc.rs b/rts/motoko-rts/src/alloc.rs index 5bccc1d2e90..432fec3af74 100644 --- a/rts/motoko-rts/src/alloc.rs +++ b/rts/motoko-rts/src/alloc.rs @@ -18,14 +18,14 @@ unsafe extern "C" fn alloc_words(n: Words) -> SkewedPtr { gc::ALLOCATED += Bytes(bytes.0 as u64); // Update heap pointer - let old_hp = gc::get_hp(); - let new_hp = old_hp + bytes.0 as usize; - gc::set_hp(new_hp); + let old_hp = gc::HP; + let new_hp = old_hp + bytes.0; + gc::HP = new_hp; // Grow memory if needed - grow_memory(new_hp); + grow_memory(new_hp as usize); - skew(old_hp) + skew(old_hp as usize) } /// Page allocation. Ensures that the memory up to the given pointer is allocated. diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index 17d7e7fee66..ca7bf8277e9 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -3,14 +3,8 @@ use crate::rts_trap_with; use crate::types::*; extern "C" { - /// Get end_of_heap. Provided by the code generator (src/codegen/compile.ml) - pub(crate) fn get_hp() -> usize; - - /// Set end_of_heap. Provided by the code generator (src/codegen/compile.ml) - pub(crate) fn set_hp(hp: usize); - - /// Get __heap_base - fn get_heap_base() -> usize; + /// Get __heap_base. Provided by the code generator (src/codegen/compile.ml). + fn get_heap_base() -> u32; /// Skewed pointer to a skewed pointer to an array. See closure-table.c for details. fn closure_table_loc() -> SkewedPtr; @@ -29,6 +23,14 @@ static mut RECLAIMED: Bytes = Bytes(0); /// Counter for total allocations pub(crate) static mut ALLOCATED: Bytes = Bytes(0); +/// Heap pointer +pub(crate) static mut HP: u32 = 0; + +#[no_mangle] +unsafe extern "C" fn init() { + HP = get_heap_base() as u32; +} + unsafe fn note_live_size(live: Bytes) { MAX_LIVE = ::core::cmp::max(MAX_LIVE, live); } @@ -52,6 +54,11 @@ unsafe extern "C" fn get_total_allocations() -> Bytes { ALLOCATED } +#[no_mangle] +unsafe extern "C" fn get_heap_size() -> Bytes { + Bytes(HP - get_heap_base()) +} + /// Returns object size in words unsafe fn object_size(obj: usize) -> Words { let obj = obj as *const Obj; @@ -343,8 +350,8 @@ unsafe fn evac_static_roots( /// The entry point. Called by the generated code. #[no_mangle] unsafe extern "C" fn collect() { - let begin_from_space = get_heap_base(); - let end_from_space = get_hp(); + let begin_from_space = get_heap_base() as usize; + let end_from_space = HP as usize; let begin_to_space = end_from_space; let mut end_to_space = begin_to_space; @@ -389,5 +396,5 @@ unsafe extern "C" fn collect() { // Reset the heap pointer let new_hp = begin_from_space + (end_to_space - begin_to_space); - set_hp(new_hp); + HP = new_hp as u32; } diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index 682d0647cb5..8df90b4550a 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -804,6 +804,8 @@ module RTS = struct E.add_func_import env "rts" "alloc_bytes" [I32Type] [I32Type]; E.add_func_import env "rts" "alloc_words" [I32Type] [I32Type]; E.add_func_import env "rts" "get_total_allocations" [] [I64Type]; + E.add_func_import env "rts" "get_heap_size" [] [I32Type]; + E.add_func_import env "rts" "init" [] []; () end (* RTS *) @@ -819,13 +821,6 @@ module Heap = struct let get_heap_base env = G.i (GlobalGet (nr (E.get_global env "__heap_base"))) - (* We keep track of the end of the used heap in this global, and bump it if - we allocate stuff. This is the actual memory offset, not-skewed yet *) - let get_heap_ptr env = - G.i (GlobalGet (nr (E.get_global env "end_of_heap"))) - let set_heap_ptr env = - G.i (GlobalSet (nr (E.get_global env "end_of_heap"))) - let register_globals env = (* end-of-heap pointer, we set this to __heap_base upon start *) E.add_global32 env "end_of_heap" Mutable 0xDEADBEEFl @@ -909,26 +904,6 @@ module Heap = struct let memcmp env = E.call_import env "rts" "as_memcmp" let register env = - - let get_hp_fn = E.add_fun env "get_hp" (Func.of_body env [] [I32Type] (fun env -> - get_heap_ptr env - )) in - - E.add_export env (nr { - name = Wasm.Utf8.decode "get_hp"; - edesc = nr (FuncExport (nr get_hp_fn)) - }); - - let set_hp_fn = E.add_fun env "set_hp" (Func.of_body env [("new_hp", I32Type)] [] (fun env -> - G.i (LocalGet (nr (Int32.of_int 0))) ^^ - set_heap_ptr env - )) in - - E.add_export env (nr { - name = Wasm.Utf8.decode "set_hp"; - edesc = nr (FuncExport (nr set_hp_fn)) - }); - let get_heap_base_fn = E.add_fun env "get_heap_base" (Func.of_body env [] [I32Type] (fun env -> get_heap_base env )) in @@ -936,16 +911,10 @@ module Heap = struct E.add_export env (nr { name = Wasm.Utf8.decode "get_heap_base"; edesc = nr (FuncExport (nr get_heap_base_fn)) - }); - - Func.define_built_in env "get_heap_size" [] [I32Type] (fun env -> - get_heap_ptr env ^^ - get_heap_base env ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Sub)) - ) + }) let get_heap_size env = - G.i (Call (nr (E.built_in env "get_heap_size"))) + E.call_import env "rts" "get_heap_size" end (* Heap *) @@ -7609,7 +7578,7 @@ and conclude_module env start_fi_o = (* Wrap the start function with the RTS initialization *) let rts_start_fi = E.add_fun env "rts_start" (Func.of_body env [] [] (fun env1 -> - Heap.get_heap_base env ^^ Heap.set_heap_ptr env ^^ + E.call_import env "rts" "init" ^^ match start_fi_o with | Some fi -> G.i (Call fi) From 52060df048ea45302e7d60622ee70316c03a79bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 14:31:02 +0300 Subject: [PATCH 19/20] Update comments re: scavenging static roots --- rts/motoko-rts/src/gc.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index ca7bf8277e9..fe9e434118c 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -131,8 +131,9 @@ unsafe fn memcpy_bytes(to: usize, from: usize, n: Bytes) { /// /// - begin_from_space: Where the dynamic heap starts. Used for two things: /// -/// - An object is static if its address is below this value. These objects don't point to -/// dynamic heap so we skip those. +/// - An object is static if its address is below this value. These objects either don't point to +/// dynamic heap, or listed in static_roots array. Objects in static_roots are scavenged +/// separately in `evac_static_roots` below. So we skip these objects here. /// /// - After all objects are evacuated we move to-space to from-space, to be able to do that the /// pointers need to point to their (eventual) locations in from-space, which is calculated with @@ -340,7 +341,8 @@ unsafe fn evac_static_roots( end_to_space: &mut usize, roots: *const Array, ) { - // Roots are in a static array which we don't evacuate. Only evacuate elements. + // The array and the objects pointed by the array are all static so we don't evacuate them. We + // only evacuate fields of objects in the array. for i in 0..(*roots).len { let obj = roots.get(i); scav(begin_from_space, begin_to_space, end_to_space, obj.unskew()); From 8d02c335512cd1901959c3648cd53254e4b07664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 3 Sep 2020 16:10:51 +0300 Subject: [PATCH 20/20] Rename indirection -> forwarding pointer (The word "forwarding pointer" is used widely in the literature for these things) --- rts/motoko-rts/src/gc.rs | 16 ++++++++-------- rts/motoko-rts/src/types.rs | 6 +++--- src/codegen/compile.ml | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs index fe9e434118c..3998cdb4c58 100644 --- a/rts/motoko-rts/src/gc.rs +++ b/rts/motoko-rts/src/gc.rs @@ -96,8 +96,8 @@ unsafe fn object_size(obj: usize) -> Words { size_of::() + (*blob).len.to_words() } - TAG_INDIRECTION => { - rts_trap_with("object_size of indirection\0".as_ptr()); + TAG_FWD_PTR => { + rts_trap_with("object_size: forwarding pointer\0".as_ptr()); } TAG_BITS32 => Words(2), @@ -107,7 +107,7 @@ unsafe fn object_size(obj: usize) -> Words { TAG_CONCAT => size_of::(), _ => { - rts_trap_with("Invalid object tag in object size\0".as_ptr()); + rts_trap_with("object_size: invalid object tag\0".as_ptr()); } } } @@ -166,8 +166,8 @@ unsafe fn evac( let obj = (*ptr_loc).unskew() as *mut Obj; // Update the field if the object is already evacauted - if (*obj).tag == TAG_INDIRECTION { - let fwd = (*(obj as *const Indirection)).fwd; + if (*obj).tag == TAG_FWD_PTR { + let fwd = (*(obj as *const FwdPtr)).fwd; *ptr_loc = fwd; return; } @@ -185,8 +185,8 @@ unsafe fn evac( let obj_loc = (*end_to_space - begin_to_space) + begin_from_space; // Set forwarding pointer - let fwd = obj as *mut Indirection; - (*fwd).header.tag = TAG_INDIRECTION; + let fwd = obj as *mut FwdPtr; + (*fwd).header.tag = TAG_FWD_PTR; (*fwd).fwd = skew(obj_loc); // Update evacuated field @@ -326,7 +326,7 @@ unsafe fn scav( // These don't include pointers, skip } - TAG_INDIRECTION | _ => { + TAG_FWD_PTR | _ => { // Any other tag is a bug rts_trap_with("invalid object tag in scav\0".as_ptr()); } diff --git a/rts/motoko-rts/src/types.rs b/rts/motoko-rts/src/types.rs index 3b3842c44af..0c4b79eda26 100644 --- a/rts/motoko-rts/src/types.rs +++ b/rts/motoko-rts/src/types.rs @@ -98,7 +98,7 @@ pub const TAG_CLOSURE: Tag = 7; pub const TAG_SOME: Tag = 8; pub const TAG_VARIANT: Tag = 9; pub const TAG_BLOB: Tag = 10; -pub const TAG_INDIRECTION: Tag = 11; +pub const TAG_FWD_PTR: Tag = 11; pub const TAG_BITS32: Tag = 12; pub const TAG_BIGINT: Tag = 13; pub const TAG_CONCAT: Tag = 14; @@ -172,9 +172,9 @@ pub struct Blob { // data follows .. } -// aka. a forwarding pointer +/// A forwarding pointer placed by the GC in place of an evacuated object. #[repr(C)] -pub struct Indirection { +pub struct FwdPtr { pub header: Obj, pub fwd: SkewedPtr, } diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index 8df90b4550a..3fea4bc49b3 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -1126,7 +1126,7 @@ module Tagged = struct | Some (* For opt *) | Variant | Blob - (* | Indirection -- commented out, only used by the GC *) + (* | FwdPtr -- Only used by the GC *) | Bits32 (* Contains a 32 bit unsigned number *) | BigInt (* | Concat -- String concatenation, used by rts/text.c *)