From cf81ff613c6903f555cb19606c65443a0103eff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 9 Jul 2020 14:13:28 +0300 Subject: [PATCH] Implement the GC in Rust This implements the current garbage collector in Rust. No changes were made to the GC design -- it's just ports the one implemented in code generator to Rust. The goals are: - Evaluate Rust for Motoko's RTS implementation - Make the collector easier to read, understand, modify, and extend. Current status: the code is complete in the sense that there aren't any missing features/passes etc., but it's has bugs. I'm not sure how to debug Wasm yet. There are also lots of TODOs in the code, mostly for documentation. Submitting a PR to get early feedback. --- .gitignore | 1 + default.nix | 24 +- nix/default.nix | 11 + rts/Makefile | 25 +- rts/motoko-rts/.vim/coc-settings.json | 3 + rts/motoko-rts/Cargo.lock | 5 + rts/motoko-rts/Cargo.toml | 7 + rts/motoko-rts/src/array.rs | 21 + rts/motoko-rts/src/common.rs | 3 + rts/motoko-rts/src/gc.rs | 461 ++++++++++++++++++ rts/motoko-rts/src/lib.rs | 11 + rts/motoko-rts/src/types.rs | 125 +++++ src/codegen/compile.ml | 200 +++----- .../ok/array-out-of-bounds.drun-run.ok | 4 +- .../ok/array-out-of-bounds.ic-ref-run.ok | 8 +- test/run/ok/array-bounds.wasm-run.ok | 13 +- 16 files changed, 762 insertions(+), 160 deletions(-) create mode 100644 rts/motoko-rts/.vim/coc-settings.json create mode 100644 rts/motoko-rts/Cargo.lock create mode 100644 rts/motoko-rts/Cargo.toml create mode 100644 rts/motoko-rts/src/array.rs create mode 100644 rts/motoko-rts/src/common.rs create mode 100644 rts/motoko-rts/src/gc.rs create mode 100644 rts/motoko-rts/src/lib.rs create mode 100644 rts/motoko-rts/src/types.rs diff --git a/.gitignore b/.gitignore index ffe32527beb..3d1eecb2eac 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ _out _output _build +target **/*~ /result* diff --git a/default.nix b/default.nix index 34274af62d1..746187b0d51 100644 --- a/default.nix +++ b/default.nix @@ -22,16 +22,18 @@ let haskellPackages = nixpkgs.haskellPackages.override { overrides = import nix/haskell-packages.nix nixpkgs subpath; }; in let - llvmBuildInputs = [ + rtsBuildInputs = [ nixpkgs.clang_10 # for native/wasm building nixpkgs.lld_10 # for wasm building + nixpkgs.rustc + nixpkgs.cargo ]; - # When compiling natively, we want to use `clang` (which is a nixpkgs - # provided wrapper that sets various include paths etc). - # But for some reason it does not handle building for Wasm well, so - # there we use plain clang-10. There is no stdlib there anyways. - llvmEnv = '' + rtsEnv = '' + # When compiling natively, we want to use `clang` (which is a nixpkgs + # provided wrapper that sets various include paths etc). + # But for some reason it does not handle building for Wasm well, so + # there we use plain clang-10. There is no stdlib there anyways. export CLANG="${nixpkgs.clang_10}/bin/clang" export WASM_CLANG="clang-10" export WASM_LD=wasm-ld @@ -122,10 +124,10 @@ rec { src = subpath ./rts; nativeBuildInputs = [ nixpkgs.makeWrapper ]; - buildInputs = llvmBuildInputs; + buildInputs = rtsBuildInputs; preBuild = '' - ${llvmEnv} + ${rtsEnv} export TOMMATHSRC=${nixpkgs.sources.libtommath} export MUSLSRC=${nixpkgs.sources.musl-wasi}/libc-top-half/musl export MUSL_WASI_SYSROOT=${musl-wasi-sysroot} @@ -206,11 +208,11 @@ rec { wasmtime nixpkgs.sources.esm ] ++ - llvmBuildInputs; + rtsBuildInputs; checkPhase = '' patchShebangs . - ${llvmEnv} + ${rtsEnv} export MOC=moc export MO_LD=mo-ld export DIDC=didc @@ -486,7 +488,7 @@ rec { builtins.concatMap (d: d.buildInputs) (builtins.attrValues tests) )); - shellHook = llvmEnv; + shellHook = rtsEnv; ESM=nixpkgs.sources.esm; TOMMATHSRC = nixpkgs.sources.libtommath; MUSLSRC = "${nixpkgs.sources.musl-wasi}/libc-top-half/musl"; diff --git a/nix/default.nix b/nix/default.nix index 46ff7189492..f47bcccf433 100644 --- a/nix/default.nix +++ b/nix/default.nix @@ -34,6 +34,17 @@ let wasmtime = self.callPackage ./wasmtime.nix {}; } ) + # nixpkgs's rustc does not include the wasm32-unknown-unknown target, so + # lets add it here. + (self: super: { + rustc = super.rustc.overrideAttrs (old: { + configureFlags = self.lib.lists.forEach old.configureFlags (flag: + if self.lib.strings.hasPrefix "--target=" flag + then flag + ",wasm32-unknown-unknown,wasm32-unknown-emscripten" + else flag + ); + }); + }) ]; }; in diff --git a/rts/Makefile b/rts/Makefile index 69e29c93be7..ff120fe9f66 100644 --- a/rts/Makefile +++ b/rts/Makefile @@ -1,3 +1,5 @@ +SHELL:=bash -O globstar + CLANG ?= clang-10 WASM_CLANG ?= clang-10 WASM_LD ?= wasm-ld-10 @@ -154,14 +156,28 @@ _build/native/tommath_%.o: %.c rts.h buf.h | _build/native _build/wasm/musl_%.o: %.c | _build/wasm $(WASM_CLANG) $(CLANG_FLAGS) $(WASM_FLAGS) $(MUSL_FLAGS) $< --output $@ +.PHONY: _build/wasm/motoko_rts.o +_build/wasm/motoko_rts.o: | _build/wasm + cd motoko-rts && \ + cargo rustc --target=wasm32-unknown-emscripten --release -v -- -Crelocation-model=pic --emit=obj + cp motoko-rts/target/wasm32-unknown-emscripten/release/deps/motoko_rts*.o $@ + +.PHONY: _build/native/motoko_rts.o +_build/native/motoko_rts.o: | _build/native + cd motoko-rts && \ + cargo rustc --release -v -- -Crelocation-model=pic --emit=obj + cp motoko-rts/target/release/deps/motoko_rts*.o $@ + RTS_WASM_O=$(RTSFILES:%=_build/wasm/%.o) RTS_NATIVE_O=$(RTSFILES:%=_build/native/%.o) +RTS_RUST_WASM_O=_build/wasm/motoko_rts.o +RTS_RUST_NATIVE_O=_build/native/motoko_rts.o # # The actual RTS, as we ship it with the compiler # -mo-rts.wasm: $(RTS_WASM_O) $(TOMMATH_WASM_O) $(MUSL_WASM_O) +mo-rts.wasm: $(RTS_WASM_O) $(RTS_RUST_WASM_O) $(TOMMATH_WASM_O) $(MUSL_WASM_O) $(WASM_LD) -o $@ \ --import-memory --shared --no-entry --gc-sections \ --export=__wasm_call_ctors \ @@ -171,12 +187,11 @@ mo-rts.wasm: $(RTS_WASM_O) $(TOMMATH_WASM_O) $(MUSL_WASM_O) # A simple program to do simple tests of rts.c, using native compilation # -test_rts: test_rts.c $(RTS_NATIVE_O) $(TOMMATH_NATIVE_O) +test_rts: test_rts.c $(RTS_NATIVE_O) $(RTS_RUST_NATIVE_O) $(TOMMATH_NATIVE_O) $(CLANG) -o $@ $+ -test_leb128: test_leb128.c $(RTS_NATIVE_O) $(TOMMATH_NATIVE_O) +test_leb128: test_leb128.c $(RTS_NATIVE_O) $(RTS_RUST_NATIVE_O) $(TOMMATH_NATIVE_O) $(CLANG) -o $@ $+ clean: - rm -rf _build mo-rts.wasm test_rts test_leb128 - + rm -rf _build mo-rts.wasm test_rts test_leb128 motoko-rts/target diff --git a/rts/motoko-rts/.vim/coc-settings.json b/rts/motoko-rts/.vim/coc-settings.json new file mode 100644 index 00000000000..3f8a8fcf820 --- /dev/null +++ b/rts/motoko-rts/.vim/coc-settings.json @@ -0,0 +1,3 @@ +{ + "rust-analyzer.cargo.target": "wasm32-unknown-emscripten" +} diff --git a/rts/motoko-rts/Cargo.lock b/rts/motoko-rts/Cargo.lock new file mode 100644 index 00000000000..3c535bd34c9 --- /dev/null +++ b/rts/motoko-rts/Cargo.lock @@ -0,0 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "motoko-rts" +version = "0.1.0" diff --git a/rts/motoko-rts/Cargo.toml b/rts/motoko-rts/Cargo.toml new file mode 100644 index 00000000000..06b681d4d7b --- /dev/null +++ b/rts/motoko-rts/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "motoko-rts" +version = "0.1.0" +authors = ["Ömer Sinan Ağacan "] +edition = "2018" + +[dependencies] diff --git a/rts/motoko-rts/src/array.rs b/rts/motoko-rts/src/array.rs new file mode 100644 index 00000000000..1ad16ec96e1 --- /dev/null +++ b/rts/motoko-rts/src/array.rs @@ -0,0 +1,21 @@ +use crate::common::rts_trap_with; +use crate::types::{skew, Array, SkewedPtr}; + +/// Returns address of Nth payload field of an array +#[no_mangle] +pub unsafe extern "C" fn array_field_addr(array: SkewedPtr, idx: u32) -> SkewedPtr { + let array_ptr = array.unskew() as *const Array; + + if idx >= (*array_ptr).len { + rts_trap_with("Array index out of bounds\0".as_ptr()); + } + + let payload_begin = array_ptr.offset(1) as *const u32; + skew(payload_begin.offset(idx as isize) as usize) +} + +/// Index an array. Does not check bounds. +pub unsafe fn array_idx_unchecked(array_ptr: *const Array, idx: u32) -> SkewedPtr { + let payload_begin = array_ptr.offset(1) as *const u32; + SkewedPtr(*payload_begin.offset(idx as isize) as usize) +} diff --git a/rts/motoko-rts/src/common.rs b/rts/motoko-rts/src/common.rs new file mode 100644 index 00000000000..0f5fb99567e --- /dev/null +++ b/rts/motoko-rts/src/common.rs @@ -0,0 +1,3 @@ +extern "C" { + pub fn rts_trap_with(msg: *const u8) -> !; +} diff --git a/rts/motoko-rts/src/gc.rs b/rts/motoko-rts/src/gc.rs new file mode 100644 index 00000000000..99fcc64e9fc --- /dev/null +++ b/rts/motoko-rts/src/gc.rs @@ -0,0 +1,461 @@ +// TODO: Review SkewedPtrs below, most of them don't need to be SkewedPtrs +// +// TODO: usize == u32 on Wasm, but rustc doesn't assume we're compiling exclusively to Wasm so we +// have lots of coercions. It'll be simpler if we only used u32. +// +// TODO: inconsistent use of size vs. len + +use core::arch::wasm32; + +use crate::array::array_idx_unchecked; +use crate::common::rts_trap_with; +use crate::types::*; + +extern "C" { + /// Get end_of_heap + fn get_hp() -> SkewedPtr; + + /// Set end_of_heap + fn set_hp(hp: SkewedPtr); + + /// Get __heap_base + fn get_heap_base() -> SkewedPtr; + + /// See closure-table.c + fn closure_table_loc() -> SkewedPtr; + + fn get_static_roots() -> *const Array; +} + +/// Maximum live data retained in a GC, in bytes. +// +// NOTE (osa): In the original code (compile.ml) this variable was 64-bit, but I'm not sure why +// that is necessary. Pointers in wasm32 are 32-bits so if the entire address space is live you +// you max u32::MAX here, no need for 64-bits. +// +static mut MAX_LIVE: Bytes = Bytes(0); + +/// In bytes +// TODO: I don't understand what this is for +static mut RECLAIMED: Bytes = Bytes(0); + +#[no_mangle] +pub unsafe extern "C" fn note_live_size(live: Bytes) { + MAX_LIVE = Bytes(::core::cmp::max(MAX_LIVE.0, live.0)); +} + +#[no_mangle] +pub unsafe extern "C" fn get_max_live_size() -> Bytes { + MAX_LIVE +} + +#[no_mangle] +pub unsafe extern "C" fn note_reclaimed(reclaimed: Bytes) { + RECLAIMED.0 += reclaimed.0 as u64; +} + +#[no_mangle] +pub unsafe extern "C" fn get_reclaimed() -> Bytes { + RECLAIMED +} + +/// Page allocation. Ensures that the memory up to the given unskewed pointer is allocated. +#[no_mangle] +pub unsafe extern "C" fn grow_memory(ptr: usize) { + let total_pages_needed = ((ptr / 65536) + 1) as i32; + let current_pages = wasm32::memory_size(0) as i32; + let new_pages_needed = total_pages_needed - current_pages; + if new_pages_needed > 0 { + if wasm32::memory_grow(0, new_pages_needed as usize) == core::usize::MAX { + rts_trap_with("Cannot grow memory\0".as_ptr()); + } + } +} + +/// Returns object size in words +#[no_mangle] +pub unsafe extern "C" fn object_size(obj: SkewedPtr) -> Words { + let obj = obj.unskew() as *const Obj; + match (*obj).tag { + TAG_OBJECT => { + let object = obj as *const Object; + let size = (*object).size; + Words(size + 3) // TODO: document what "3" includes + } + + TAG_OBJ_IND => Words(2), + + TAG_ARRAY => { + let array = obj as *const Array; + let size = (*array).len; + Words(size + 2) // TODO: document what "2" includes + } + + TAG_BITS64 => Words(3), + + TAG_MUTBOX => Words(2), + + TAG_CLOSURE => { + let closure = obj as *const Closure; + let size = (*closure).size; + Words(size + 3) // TODO: document what "3" includes + } + + TAG_SOME => Words(2), + + TAG_VARIANT => Words(3), + + TAG_BLOB => { + let blob = obj as *const Blob; + Words((((*blob).len + 3) / 4) + 2) // TODO: document this + } + + TAG_INDIRECTION => { + rts_trap_with("object_size of indirection\0".as_ptr()); + } + + TAG_BITS32 => Words(2), + + TAG_BIGINT => Words(5), + + TAG_CONCAT => Words(4), + + _ => { + rts_trap_with("invalid object tag in object_size\0".as_ptr()); + } + } +} + +#[no_mangle] +pub unsafe extern "C" fn is_tagged_scalar(p: SkewedPtr) -> bool { + p.0 & 0b10 == 0 +} + +fn words_to_bytes(words: Words) -> Bytes { + Bytes(words.0 * 4) +} + +fn bytes_to_words(bytes: Bytes) -> Words { + Words(bytes.0 / 4) +} + +const WORD_SIZE: u32 = 4; + +#[no_mangle] +pub unsafe extern "C" fn memcpy_words_skewed(to: SkewedPtr, from: SkewedPtr, n: Words) { + let to_ptr = to.unskew() as *mut u32; + let from_ptr = from.unskew() as *mut u32; + for i in 0..n.0 as isize { + *to_ptr.offset(i) = *(from_ptr.offset(i)); + } +} + +/// Evacuate (copy) an object in from-space to to-space, return new end of to-space. Returns the +/// original to-space if the object is already evacuated. +/// +/// Arguments: +/// +/// - begin_from_space: Where the dynamic heap starts. Used for two things: +/// +/// - An object is static if its address is below this value. These object don't need collection +/// so we skip those. +/// +/// - After all objects are evacuated we move to-space to from-space, to be able to do that the +/// pointers need to point to their locations in from-space, which is calculated with +/// `end_to_space - begin_to_space + begin_from_space`. +/// +/// - begin_to_space: Where to-space starts. See above for how this is used. +/// +/// - ptr: Location of the object to evacuate, e.g. an object field address. +/// + +// NB. This is 'evacuate_common' in compile.ml + +unsafe fn evac( + begin_from_space: usize, // not skewed! + begin_to_space: usize, // not skewed! + end_to_space: usize, // not skewed! + ptr_loc: SkewedPtr, // skewed pointer to the field with object to evacuate to to-space +) -> SkewedPtr { + // Field holds a skewed pointer to the object to evacuate + let ptr_loc = ptr_loc.unskew() as *mut SkewedPtr; + + // Ignore static objects, they can't point to dynamic heap + if (ptr_loc as usize) < begin_from_space { + return skew(end_to_space); + } + + let obj_skewed = *ptr_loc; + let obj = (*ptr_loc).unskew() as *mut Obj; + + // Update the field if the object is already evacauted + if (*obj).tag == TAG_INDIRECTION { + let fwd = (*(obj as *const Indirection)).fwd; + *ptr_loc = fwd; + } + + let obj_size = object_size(obj_skewed); + let obj_size_bytes = words_to_bytes(obj_size); + + // Grow memory if needed + grow_memory(end_to_space + obj_size_bytes.0 as usize); + + // Copy object to to-space + memcpy_words_skewed(skew(end_to_space), obj_skewed, obj_size); + + // Final location of the object after copying to-space back to from-space + let obj_loc = (end_to_space - begin_to_space) + begin_from_space; + + // Set forwarding pointer + let fwd = obj as *mut Indirection; + (*fwd).header.tag = TAG_INDIRECTION; + (*fwd).fwd = skew(obj_loc); + + // Update evacuated field + *ptr_loc = skew(obj_loc); + + // Return new end of to-space + skew(end_to_space + obj_size_bytes.0 as usize) +} + +#[no_mangle] +pub unsafe extern "C" fn evacuate( + begin_from_space: usize, // not skewed! + begin_to_space: usize, // not skewed! + end_to_space: usize, // not skewed! + ptr_loc: SkewedPtr, // skewed pointer to field with the object to evacuate to to-space +) -> SkewedPtr { + if is_tagged_scalar(*(ptr_loc.unskew() as *const SkewedPtr)) { + return skew(end_to_space); + } + + evac(begin_from_space, begin_to_space, end_to_space, ptr_loc) +} + +/// Evacuate a blob payload pointed by a bigint. bigints are special in that a bigint's first field +/// is an internal pointer: it points to payload of a blob object, instead of to the header. +/// +/// - `ptr_loc`: Address of a `data_ptr` field of a BigInt (see types.rs). Points to payload of a +/// blob. See types.rs for blob layout. +#[no_mangle] +pub unsafe extern "C" fn evacuate_bigint_blob( + begin_from_space: SkewedPtr, + begin_to_space: SkewedPtr, + end_to_space: SkewedPtr, + ptr_loc: *mut usize, // address of field with a pointer to a blob payload, not skewed +) -> SkewedPtr { + let blob_payload_addr = *ptr_loc; + + // Get blob object from the payload + let mut blob_obj_addr = skew(blob_payload_addr - 2 * (WORD_SIZE as usize)); + // Create a temporary field to the blob object, to be passed to `evac`. + let blob_obj_addr_field = &mut blob_obj_addr; + let blob_obj_addr_field_ptr = blob_obj_addr_field as *mut _; + + let ret = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + skew(blob_obj_addr_field_ptr as usize), // FIXME: no need to be skewed + ); + + // blob_obj_addr_field now has the new location of the blob, get the payload address + let blob_new_addr = (*blob_obj_addr_field).unskew(); + let blob_new_payload_addr = blob_new_addr + 2 * (WORD_SIZE as usize); + + // Update evacuated field + *ptr_loc = blob_new_payload_addr; // not skewed! + + ret +} + +unsafe fn scav( + begin_from_space: SkewedPtr, + begin_to_space: SkewedPtr, + mut end_to_space: SkewedPtr, + obj: SkewedPtr, +) -> SkewedPtr { + let obj = obj.unskew() as *const Obj; + + match (*obj).tag { + TAG_OBJECT => { + let obj = obj as *mut Object; + let obj_payload = obj.offset(1) as *mut SkewedPtr; + for i in 0..(*obj).size as isize { + end_to_space = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + skew(obj_payload.offset(i) as usize), // FIXME: no need to be skewed + ); + } + } + + TAG_ARRAY => { + let array = obj as *mut Array; + let array_payload = array.offset(1) as *mut SkewedPtr; + for i in 0..(*array).len as isize { + end_to_space = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + skew(array_payload.offset(i) as usize), // FIXME + ); + } + } + + TAG_MUTBOX => { + let mutbox = obj as *mut MutBox; + let field_addr = ((&mut (*mutbox).field) as *mut _) as usize; + end_to_space = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + skew(field_addr), + ); + } + + TAG_CLOSURE => { + let closure = obj as *mut Closure; + let closure_payload = closure.offset(1) as *mut SkewedPtr; + for i in 0..(*closure).size as isize { + end_to_space = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + skew(closure_payload.offset(i) as usize), // FIXME + ); + } + } + + TAG_SOME => { + let some = obj as *mut Some; + let field_addr = ((&mut (*some).field) as *mut _) as usize; + end_to_space = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + skew(field_addr), + ); + } + + TAG_VARIANT => { + let variant = obj as *mut Variant; + let field_addr = ((&mut (*variant).field) as *mut _) as usize; + end_to_space = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + skew(field_addr), + ); + } + + TAG_BIGINT => { + let bigint = obj as *mut BigInt; + let data_ptr_addr = (&mut (*bigint).data_ptr) as *mut _; + + end_to_space = evacuate_bigint_blob( + begin_from_space, + begin_to_space, + end_to_space, + data_ptr_addr, + ); + } + + TAG_CONCAT => { + let concat = obj as *mut Concat; + let field1_addr = ((&mut (*concat).text1) as *mut _) as usize; + end_to_space = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + skew(field1_addr), + ); + let field2_addr = ((&mut (*concat).text2) as *mut _) as usize; + end_to_space = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + skew(field2_addr), + ); + } + + TAG_BITS64 | TAG_BITS32 | TAG_BLOB => { + // These don't include pointers, skip + } + + TAG_OBJ_IND | TAG_INDIRECTION => { + // These are ignored in the original code for some reason TODO + } + + _ => { + // Any other tag is a bug + rts_trap_with("invalid object tag in scav\0".as_ptr()); + } + } + + end_to_space +} + +unsafe fn evac_static_roots( + begin_from_space: SkewedPtr, + begin_to_space: SkewedPtr, + mut end_to_space: SkewedPtr, + roots: *const Array, +) -> SkewedPtr { + // Roots are in a static array which we don't evacuate. Only evacuate elements. + for i in 0..(*roots).len { + let obj = array_idx_unchecked(roots, i); + end_to_space = scav(begin_from_space, begin_to_space, end_to_space, obj); + } + end_to_space +} + +#[no_mangle] +pub unsafe extern "C" fn rust_collect_garbage() { + let static_roots = get_static_roots(); + + // Beginning of tospace = end of fromspace + let begin_from_space = get_heap_base(); + let end_from_space = get_hp(); + let begin_to_space = end_from_space; + let mut end_to_space = begin_to_space; + + // Evacuate roots + end_to_space = evac_static_roots(begin_from_space, begin_to_space, end_to_space, static_roots); + end_to_space = evacuate( + begin_from_space.unskew(), + begin_to_space.unskew(), + end_to_space.unskew(), + closure_table_loc(), + ); + + // Scavenge to-space + let mut p = begin_to_space; + while p.unskew() < end_to_space.unskew() { + end_to_space = scav(begin_from_space, begin_to_space, end_to_space, p); + p.0 += words_to_bytes(object_size(p)).0 as usize; + } + + // Note the stats + let new_live_size = end_to_space.unskew() - begin_to_space.unskew(); + note_live_size(Bytes(new_live_size as u32)); + + let reclaimed = (end_from_space.unskew() - begin_from_space.unskew()) + - (end_to_space.unskew() - begin_to_space.unskew()); + note_reclaimed(Bytes(reclaimed as u32)); + + // Copy to-space to the beginning of from-space + memcpy_words_skewed( + begin_from_space, + begin_to_space, + bytes_to_words(Bytes( + (end_to_space.unskew() - begin_to_space.unskew()) as u32, + )), + ); + + // Reset the heap pointer + set_hp(skew( + begin_from_space.unskew() + (end_to_space.unskew() - begin_to_space.unskew()), + )); +} diff --git a/rts/motoko-rts/src/lib.rs b/rts/motoko-rts/src/lib.rs new file mode 100644 index 00000000000..8e30f5d61e0 --- /dev/null +++ b/rts/motoko-rts/src/lib.rs @@ -0,0 +1,11 @@ +//! Implements bits and pieces of Motoko runtime system. Currently garbage collection and a few +//! utilities. + +#![no_std] + +pub mod array; +pub mod common; +pub mod types; + +#[cfg(target_arch = "wasm32")] +pub mod gc; diff --git a/rts/motoko-rts/src/types.rs b/rts/motoko-rts/src/types.rs new file mode 100644 index 00000000000..94bca9c7f32 --- /dev/null +++ b/rts/motoko-rts/src/types.rs @@ -0,0 +1,125 @@ +/// The unit "words": `Words(123u32)` means 123 words. +#[repr(C)] +#[derive(PartialEq, Eq, Clone, Copy)] +pub struct Words(pub A); + +/// The unit "bytes": `Bytes(123u32)` means 123 bytes. +#[repr(C)] +#[derive(PartialEq, Eq, Clone, Copy)] +pub struct Bytes(pub A); + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct SkewedPtr(pub usize); + +impl SkewedPtr { + pub fn unskew(self) -> usize { + self.0.wrapping_add(1) + } +} + +pub fn skew(ptr: usize) -> SkewedPtr { + SkewedPtr(ptr.wrapping_sub(1)) +} + +// NOTE: We don't create an enum for tags as we can never assume to do exhaustive pattern match on +// tags, because of heap corruptions and other bugs (in the code generator or RTS, or maybe because +// of an unsafe API usage). +pub type Tag = u32; + +pub const TAG_OBJECT: Tag = 1; +pub const TAG_OBJ_IND: Tag = 2; +pub const TAG_ARRAY: Tag = 3; +pub const TAG_BITS64: Tag = 5; +pub const TAG_MUTBOX: Tag = 6; +pub const TAG_CLOSURE: Tag = 7; +pub const TAG_SOME: Tag = 8; +pub const TAG_VARIANT: Tag = 9; +pub const TAG_BLOB: Tag = 10; +pub const TAG_INDIRECTION: Tag = 11; +pub const TAG_BITS32: Tag = 12; +pub const TAG_BIGINT: Tag = 13; +pub const TAG_CONCAT: Tag = 14; +// const TAG_TABLE_SEEN: Tag = 15; // No idea what this is + +// Common parts of any object. Other object pointers can be coerced into a pointer to this. +#[repr(C)] +pub struct Obj { + pub tag: Tag, +} + +#[repr(C)] +#[rustfmt::skip] +pub struct Array { + pub header: Obj, + pub len: u32, // number of elements + + // Array elements follow, each u32 sized. We can't have variable-sized structs in Rust so we + // can't add a field here for the elements. + // https://doc.rust-lang.org/nomicon/exotic-sizes.html +} + +#[repr(C)] +pub struct Object { + pub header: Obj, + pub size: u32, + // other stuff follows, but we don't need them currently +} + +#[repr(C)] +pub struct Closure { + pub header: Obj, + pub funid: u32, + pub size: u32, + // other stuff follows ... +} + +#[repr(C)] +pub struct Blob { + pub header: Obj, + pub len: u32, + // data follows .. +} + +// aka. a forwarding pointer +#[repr(C)] +pub struct Indirection { + pub header: Obj, + pub fwd: SkewedPtr, +} + +#[repr(C)] +pub struct BigInt { + pub header: Obj, + pub size: u32, + pub alloc: u32, // TODO: Not sure what this is + // Unskewed pointer to a blob payload. data_ptr - 2 (words) gives us the blob header. + pub data_ptr: usize, +} + +#[repr(C)] +pub struct MutBox { + pub header: Obj, + pub field: SkewedPtr, +} + +#[repr(C)] +pub struct Some { + pub header: Obj, + pub field: SkewedPtr, +} + +#[repr(C)] +pub struct Variant { + pub header: Obj, + pub tag: u32, + pub field: SkewedPtr, +} + +#[repr(C)] +pub struct Concat { + pub header: Obj, + pub n_bytes: u32, + pub text1: SkewedPtr, + pub text2: SkewedPtr, +} diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index f57fd382184..9ba72ec4adf 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -806,6 +806,16 @@ module RTS = struct E.add_func_import env "rts" "char_is_lowercase" [I32Type] [I32Type]; E.add_func_import env "rts" "char_is_uppercase" [I32Type] [I32Type]; E.add_func_import env "rts" "char_is_alphabetic" [I32Type] [I32Type]; + E.add_func_import env "rts" "grow_memory" [I32Type] []; + E.add_func_import env "rts" "object_size" [I32Type] [I32Type]; + E.add_func_import env "rts" "memcpy_words_skewed" [I32Type; I32Type; I32Type] []; + E.add_func_import env "rts" "is_tagged_scalar" [I32Type] [I32Type]; + E.add_func_import env "rts" "array_field_addr" [I32Type; I32Type] [I32Type]; + E.add_func_import env "rts" "note_live_size" [I32Type] []; + E.add_func_import env "rts" "get_max_live_size" [] [I32Type]; + E.add_func_import env "rts" "note_reclaimed" [I32Type] []; + E.add_func_import env "rts" "get_reclaimed" [] [I64Type]; + E.add_func_import env "rts" "rust_collect_garbage" [] []; () end (* RTS *) @@ -854,13 +864,10 @@ module Heap = struct let add_reclaimed env = (* assumes number of reclaimed bytes on the stack *) - G.i (Convert (Wasm.Values.I64 I64Op.ExtendUI32)) ^^ - G.i (GlobalGet (nr (E.get_global env "reclaimed"))) ^^ - G.i (Binary (Wasm.Values.I64 I64Op.Add)) ^^ - G.i (GlobalSet (nr (E.get_global env "reclaimed"))) + E.call_import env "rts" "note_reclaimed" let get_reclaimed env = - G.i (GlobalGet (nr (E.get_global env "reclaimed"))) + E.call_import env "rts" "get_reclaimed" let get_memory_size = G.i MemorySize ^^ @@ -868,43 +875,10 @@ module Heap = struct let note_live_size env = (* assumes size of live set on the stack *) - let (set_live_size, get_live_size) = new_local env "live_size" in - set_live_size ^^ - get_live_size ^^ G.i (Convert (Wasm.Values.I64 I64Op.ExtendUI32)) ^^ - G.i (GlobalGet (nr (E.get_global env "max_live"))) ^^ - G.i (Compare (Wasm.Values.I64 I64Op.LtU)) ^^ - G.if_ [] G.nop begin - get_live_size ^^ G.i (Convert (Wasm.Values.I64 I64Op.ExtendUI32)) ^^ - G.i (GlobalSet (nr (E.get_global env "max_live"))) - end + E.call_import env "rts" "note_live_size" let get_max_live_size env = - G.i (GlobalGet (nr (E.get_global env "max_live"))) - - - (* Page allocation. Ensures that the memory up to the given unskewed pointer is allocated. *) - let grow_memory env = - Func.share_code1 env "grow_memory" ("ptr", I32Type) [] (fun env get_ptr -> - let (set_pages_needed, get_pages_needed) = new_local env "pages_needed" in - get_ptr ^^ compile_divU_const page_size ^^ - compile_add_const 1l ^^ - G.i MemorySize ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^ - set_pages_needed ^^ - - (* Check that the new heap pointer is within the memory *) - get_pages_needed ^^ - compile_unboxed_zero ^^ - G.i (Compare (Wasm.Values.I32 I32Op.GtS)) ^^ - G.if_ [] - ( get_pages_needed ^^ - G.i MemoryGrow ^^ - (* Check result *) - compile_unboxed_zero ^^ - G.i (Compare (Wasm.Values.I32 I32Op.LtS)) ^^ - E.then_trap_with env "Cannot grow memory." - ) G.nop - ) + E.call_import env "rts" "get_max_live_size" let dyn_alloc_words env = G.i (Call (nr (E.built_in env "alloc_words"))) let dyn_alloc_bytes env = G.i (Call (nr (E.built_in env "alloc_bytes"))) @@ -928,7 +902,7 @@ module Heap = struct set_heap_ptr env ^^ (* grow memory if needed *) - get_heap_ptr env ^^ grow_memory env + get_heap_ptr env ^^ E.call_import env "rts" "grow_memory" ); Func.define_built_in env "alloc_bytes" [("n", I32Type)] [I32Type] (fun env -> let get_n = G.i (LocalGet (nr 0l)) in @@ -1000,22 +974,7 @@ module Heap = struct let memcmp env = E.call_import env "rts" "as_memcmp" (* Copying words (works on skewed memory addresses) *) - let memcpy_words_skewed env = - Func.share_code3 env "memcpy_words_skewed" (("to", I32Type), ("from", I32Type), ("n", I32Type)) [] (fun env get_to get_from get_n -> - get_n ^^ - from_0_to_n env (fun get_i -> - get_to ^^ - get_i ^^ compile_mul_const word_size ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - - get_from ^^ - get_i ^^ compile_mul_const word_size ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - load_ptr ^^ - - store_ptr - ) - ) + let memcpy_words_skewed env = E.call_import env "rts" "memcpy_words_skewed" end (* Heap *) @@ -1121,13 +1080,7 @@ module BitTagged = struct let scalar_shift = 2l let if_tagged_scalar env retty is1 is2 = - Func.share_code1 env "is_tagged_scalar" ("x", I32Type) [I32Type] (fun env get_x -> - (* Get bit *) - get_x ^^ - compile_bitand_const 0x2l ^^ - (* Check bit *) - G.i (Test (Wasm.Values.I32 I32Op.Eqz)) - ) ^^ + E.call_import env "rts" "is_tagged_scalar" ^^ G.if_ retty is1 is2 (* With two bit-tagged pointers on the stack, decide @@ -1232,12 +1185,6 @@ module Tagged = struct set_tag ^^ go cases - (* like branch_default but the tag is known statically *) - let branch env retty = function - | [] -> G.i Unreachable - | [_, code] -> G.i Drop ^^ code - | (_, code) :: cases -> branch_default env retty code cases - (* like branch_default but also pushes the scrutinee on the stack for the * branch's consumption *) let _branch_default_with env retty def cases = @@ -2960,8 +2907,7 @@ module Arr = struct No difference between mutable and immutable arrays. *) - let header_size = Int32.add Tagged.header_size 1l - let element_size = 4l + let header_size = Int32.add Tagged.header_size 1l (* 2 *) let len_field = Int32.add Tagged.header_size 0l (* Static array access. No checking *) @@ -2969,21 +2915,7 @@ module Arr = struct (* Dynamic array access. Returns the address (not the value) of the field. Does bounds checking *) - let idx env = - Func.share_code2 env "Array.idx" (("array", I32Type), ("idx", I32Type)) [I32Type] (fun env get_array get_idx -> - (* No need to check the lower bound, we interpret idx as unsigned *) - (* Check the upper bound *) - get_idx ^^ - get_array ^^ Heap.load_field len_field ^^ - G.i (Compare (Wasm.Values.I32 I32Op.LtU)) ^^ - E.else_trap_with env "Array index out of bounds" ^^ - - get_idx ^^ - compile_add_const header_size ^^ - compile_mul_const element_size ^^ - get_array ^^ - G.i (Binary (Wasm.Values.I32 I32Op.Add)) - ) + let idx env = E.call_import env "rts" "array_field_addr" (* As above, but taking a bigint (Nat), and reporting overflow as out of bounds *) let idx_bigint env = @@ -3230,6 +3162,9 @@ module Lifecycle = struct end (* Lifecycle *) +let collect_garbage env = + G.i (Call (nr (E.built_in env "collect"))) + (* E.call_import env "rts" "rust_collect_garbage" *) module Dfinity = struct (* Dfinity-specific stuff: System imports, databufs etc. *) @@ -3368,7 +3303,7 @@ module Dfinity = struct G.i (Call (nr (E.built_in env "init"))) ^^ (* Collect garbage *) - G.i (Call (nr (E.built_in env "collect"))) ^^ + collect_garbage env ^^ Lifecycle.trans env Lifecycle.Idle ) in @@ -3405,7 +3340,7 @@ module Dfinity = struct Lifecycle.trans env Lifecycle.InPostUpgrade ^^ G.i (Call (nr (E.built_in env "post_exp"))) ^^ Lifecycle.trans env Lifecycle.Idle ^^ - G.i (Call (nr (E.built_in env "collect"))) + collect_garbage env )) in E.add_export env (nr { @@ -3574,48 +3509,8 @@ end (* RTS_Exports *) module HeapTraversal = struct - (* Returns the object size (in words) *) let object_size env = - Func.share_code1 env "object_size" ("x", I32Type) [I32Type] (fun env get_x -> - get_x ^^ - Tagged.branch env [I32Type] - [ Tagged.Bits64, - compile_unboxed_const 3l - ; Tagged.Bits32, - compile_unboxed_const 2l - ; Tagged.BigInt, - compile_unboxed_const 5l (* HeapTag + sizeof(mp_int) *) - ; Tagged.Some, - compile_unboxed_const 2l - ; Tagged.Variant, - compile_unboxed_const 3l - ; Tagged.ObjInd, - compile_unboxed_const 2l - ; Tagged.MutBox, - compile_unboxed_const 2l - ; Tagged.Array, - get_x ^^ - Heap.load_field Arr.len_field ^^ - compile_add_const Arr.header_size - ; Tagged.Blob, - get_x ^^ - Heap.load_field Blob.len_field ^^ - compile_add_const 3l ^^ - compile_divU_const Heap.word_size ^^ - compile_add_const Blob.header_size - ; Tagged.Object, - get_x ^^ - Heap.load_field Object.size_field ^^ - compile_add_const Object.header_size - ; Tagged.Closure, - get_x ^^ - Heap.load_field Closure.len_field ^^ - compile_add_const Closure.header_size - ; Tagged.Concat, - compile_unboxed_const 4l - ] - (* Indirections have unknown size. *) - ) + E.call_import env "rts" "object_size" let walk_heap_from_to env compile_from compile_to mk_code = let (set_x, get_x) = new_local env "x" in @@ -5008,7 +4903,7 @@ module GC = struct get_end_to_space ^^ get_len ^^ compile_mul_const Heap.word_size ^^ G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ - Heap.grow_memory env ^^ + E.call_import env "rts" "grow_memory" ^^ (* Copy the referenced object to to space *) get_obj ^^ HeapTraversal.object_size env ^^ set_len ^^ @@ -5073,6 +4968,45 @@ module GC = struct ) let register env static_roots (end_of_static_space : int32) = + + (* TODO: Not sure about static_roots part ... *) + let get_static_roots = E.add_fun env "get_static_roots" (Func.of_body env [] [I32Type] (fun env -> + compile_unboxed_const static_roots + )) in + + E.add_export env (nr { + name = Wasm.Utf8.decode "get_static_roots"; + edesc = nr (FuncExport (nr get_static_roots)) + }); + + let get_hp = E.add_fun env "get_hp" (Func.of_body env [] [I32Type] (fun env -> + Heap.get_heap_ptr env + )) in + + E.add_export env (nr { + name = Wasm.Utf8.decode "get_hp"; + edesc = nr (FuncExport (nr get_hp)) + }); + + let set_hp = E.add_fun env "set_hp" (Func.of_body env [("new_hp", I32Type)] [] (fun env -> + G.i (LocalGet (nr (Int32.of_int 0))) ^^ + Heap.set_heap_ptr env + )) in + + E.add_export env (nr { + name = Wasm.Utf8.decode "set_hp"; + edesc = nr (FuncExport (nr set_hp)) + }); + + let get_heap_base = E.add_fun env "get_heap_base" (Func.of_body env [] [I32Type] (fun env -> + Heap.get_heap_base env + )) in + + E.add_export env (nr { + name = Wasm.Utf8.decode "get_heap_base"; + edesc = nr (FuncExport (nr get_heap_base)) + }); + Func.define_built_in env "get_heap_size" [] [I32Type] (fun env -> Heap.get_heap_ptr env ^^ Heap.get_heap_base env ^^ @@ -5550,7 +5484,7 @@ module FuncDec = struct let message_cleanup env sort = match sort with | Type.Shared Type.Write -> - G.i (Call (nr (E.built_in env "collect"))) ^^ + collect_garbage env ^^ Lifecycle.trans env Lifecycle.Idle | Type.Shared Type.Query -> Lifecycle.trans env Lifecycle.PostQuery @@ -7188,7 +7122,7 @@ and compile_exp (env : E.t) ae exp = | OtherPrim "rts_max_live_size", [] -> SR.Vanilla, - Heap.get_max_live_size env ^^ BigNum.from_word64 env + Heap.get_max_live_size env ^^ BigNum.from_word32 env | OtherPrim "rts_callback_table_count", [] -> SR.Vanilla, diff --git a/test/run-drun/ok/array-out-of-bounds.drun-run.ok b/test/run-drun/ok/array-out-of-bounds.drun-run.ok index c28447c99d1..16c9e39493b 100644 --- a/test/run-drun/ok/array-out-of-bounds.drun-run.ok +++ b/test/run-drun/ok/array-out-of-bounds.drun-run.ok @@ -1,4 +1,4 @@ ingress Canister Id: ic:0100000000000000000000000000000000012D ingress System -ingress Err: IC0503: Canister ic:0100000000000000000000000000000000012D trapped explicitly: Array index out of bounds -ingress Err: IC0503: Canister ic:0100000000000000000000000000000000012D trapped explicitly: Array index out of bounds +ingress Err: IC0503: Canister ic:0100000000000000000000000000000000012D trapped explicitly: RTS error: Array index out of bounds +ingress Err: IC0503: Canister ic:0100000000000000000000000000000000012D trapped explicitly: RTS error: Array index out of bounds diff --git a/test/run-drun/ok/array-out-of-bounds.ic-ref-run.ok b/test/run-drun/ok/array-out-of-bounds.ic-ref-run.ok index d5a83262dd0..860c42c4228 100644 --- a/test/run-drun/ok/array-out-of-bounds.ic-ref-run.ok +++ b/test/run-drun/ok/array-out-of-bounds.ic-ref-run.ok @@ -3,8 +3,8 @@ → install ← completed → update foo1(0x4449444c0000) -Trap: EvalTrapError :0.1 "canister trapped explicitly: Array index out of bounds" -← rejected (RC_CANISTER_ERROR): canister trapped: EvalTrapError :0.1 "canister trapped explicitly: Array index out of bounds" +Trap: EvalTrapError :0.1 "canister trapped explicitly: RTS error: Array index out of bounds" +← rejected (RC_CANISTER_ERROR): canister trapped: EvalTrapError :0.1 "canister trapped explicitly: RTS error: Array index out of bounds" → update foo2(0x4449444c0000) -Trap: EvalTrapError :0.1 "canister trapped explicitly: Array index out of bounds" -← rejected (RC_CANISTER_ERROR): canister trapped: EvalTrapError :0.1 "canister trapped explicitly: Array index out of bounds" +Trap: EvalTrapError :0.1 "canister trapped explicitly: RTS error: Array index out of bounds" +← rejected (RC_CANISTER_ERROR): canister trapped: EvalTrapError :0.1 "canister trapped explicitly: RTS error: Array index out of bounds" diff --git a/test/run/ok/array-bounds.wasm-run.ok b/test/run/ok/array-bounds.wasm-run.ok index 9051c8b7fb9..bee4ff1c629 100644 --- a/test/run/ok/array-bounds.wasm-run.ok +++ b/test/run/ok/array-bounds.wasm-run.ok @@ -1,12 +1,15 @@ -Array index out of bounds +RTS error: Array index out of bounds Error: failed to run main module `_out/array-bounds.wasm` Caused by: 0: failed to invoke `_start` 1: wasm trap: unreachable, source location: @___: wasm backtrace: - 0: !Array.idx - 1: !Array.idx_bigint - 2: !init - 3: !_start + 0: !rts_trap + 1: !trap_with_prefix + 2: !rts_trap_with + 3: !array_field_addr + 4: !Array.idx_bigint + 5: !init + 6: !_start