From 49ea2bf53003e7c007ea4bac56bc6779e0cca24a Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Tue, 17 Nov 2020 11:24:45 +0100 Subject: [PATCH 1/6] chore(compiler) Rename `ModuleInfoTranslation.module_translation` to `.module_translation_state`. The idea is to avoid confusion with th e`ModuleInfoTranslation` type itself. --- lib/compiler-cranelift/src/compiler.rs | 4 ++-- lib/compiler/src/translator/environ.rs | 12 ++++++------ lib/engine-jit/src/artifact.rs | 2 +- lib/engine-native/src/artifact.rs | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/compiler-cranelift/src/compiler.rs b/lib/compiler-cranelift/src/compiler.rs index 265aa5ac1d3..84e1ae42e1a 100644 --- a/lib/compiler-cranelift/src/compiler.rs +++ b/lib/compiler-cranelift/src/compiler.rs @@ -55,7 +55,7 @@ impl Compiler for CraneliftCompiler { &self, target: &Target, compile_info: &CompileModuleInfo, - module_translation: &ModuleTranslationState, + module_translation_state: &ModuleTranslationState, function_body_inputs: PrimaryMap>, ) -> Result { let isa = self.config().isa(target); @@ -115,7 +115,7 @@ impl Compiler for CraneliftCompiler { // } func_translator.translate( - module_translation, + module_translation_state, input.data, input.module_offset, &mut context.func, diff --git a/lib/compiler/src/translator/environ.rs b/lib/compiler/src/translator/environ.rs index 81668f632f9..ffd2f0a56d9 100644 --- a/lib/compiler/src/translator/environ.rs +++ b/lib/compiler/src/translator/environ.rs @@ -44,7 +44,7 @@ pub struct ModuleInfoTranslation<'data> { pub data_initializers: Vec>, /// The decoded Wasm types for the module. - pub module_translation: Option, + pub module_translation_state: Option, } /// Object containing the standalone environment information. @@ -62,7 +62,7 @@ impl<'data> ModuleEnvironment<'data> { module: ModuleInfo::new(), function_body_inputs: PrimaryMap::new(), data_initializers: Vec::new(), - module_translation: None, + module_translation_state: None, }, imports: 0, } @@ -71,9 +71,9 @@ impl<'data> ModuleEnvironment<'data> { /// Translate a wasm module using this environment. This consumes the /// `ModuleEnvironment` and produces a `ModuleInfoTranslation`. pub fn translate(mut self, data: &'data [u8]) -> WasmResult> { - assert!(self.result.module_translation.is_none()); - let module_translation = translate_module(data, &mut self)?; - self.result.module_translation = Some(module_translation); + assert!(self.result.module_translation_state.is_none()); + let module_translation_state = translate_module(data, &mut self)?; + self.result.module_translation_state = Some(module_translation_state); Ok(self.result) } @@ -370,7 +370,7 @@ impl<'data> ModuleEnvironment<'data> { pub(crate) fn define_function_body( &mut self, - _module_translation: &ModuleTranslationState, + _module_translation_state: &ModuleTranslationState, body_bytes: &'data [u8], body_offset: usize, ) -> WasmResult<()> { diff --git a/lib/engine-jit/src/artifact.rs b/lib/engine-jit/src/artifact.rs index f79ab1c0f08..350a949937b 100644 --- a/lib/engine-jit/src/artifact.rs +++ b/lib/engine-jit/src/artifact.rs @@ -83,7 +83,7 @@ impl JITArtifact { let compilation = compiler.compile_module( &jit.target(), &compile_info, - translation.module_translation.as_ref().unwrap(), + translation.module_translation_state.as_ref().unwrap(), translation.function_body_inputs, )?; let function_call_trampolines = compilation.get_function_call_trampolines(); diff --git a/lib/engine-native/src/artifact.rs b/lib/engine-native/src/artifact.rs index 80971347441..6a3a8f1549c 100644 --- a/lib/engine-native/src/artifact.rs +++ b/lib/engine-native/src/artifact.rs @@ -133,7 +133,7 @@ impl NativeArtifact { compile_info, translation.function_body_inputs, translation.data_initializers, - translation.module_translation, + translation.module_translation_state, )) } From e48c0ee65f2352039f2fc705421313163677d7b2 Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Tue, 17 Nov 2020 15:25:46 +0100 Subject: [PATCH 2/6] doc(engine) Explain why calling `unwrap` is safe here. --- lib/engine-jit/src/artifact.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/engine-jit/src/artifact.rs b/lib/engine-jit/src/artifact.rs index 350a949937b..5567964c6e7 100644 --- a/lib/engine-jit/src/artifact.rs +++ b/lib/engine-jit/src/artifact.rs @@ -83,6 +83,9 @@ impl JITArtifact { let compilation = compiler.compile_module( &jit.target(), &compile_info, + // SAFETY: Calling `unwrap` is safe since + // `environ.translate()` above will write some data into + // `module_translation_state`. translation.module_translation_state.as_ref().unwrap(), translation.function_body_inputs, )?; From 8016a34b4ccc28442b30a50d3551d0c4fbe228cc Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Tue, 17 Nov 2020 15:51:29 +0100 Subject: [PATCH 3/6] fix(vm) Fix memory leak of `InstanceHandle`. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In `wasmer_engine::Artifact::instantiate`, the engine artifact allocates a new instance by calling `wasmer_vm::InstanceHandle::allocate_instance`. To make it short, this function calculates the [`Layout`] of an `wasmer_vm::Instance`, and allocates space for it by calling [`alloc`]. This last part is important because it means we are responsible to deallocate it with [`dealloc`]. The pointer to this `wasmer_vm::Instance` is stored in the `wasmer_vm::InstanceHandle` structure. That's the handle that is returned by the engine artifact `Artifact::instantiate` method. This instance handle is then stored in `wasmer::Instance` (through the intervention of `wasmer::Module`). How is it freed? It wasn't. That's the leak. [`dealloc`] was never called. How to free it? We must call [`dealloc`]. There is even a `wasmer_vm::InstanceHandle::deallocate` helper to do that properly. Neat! When to free it? That's the tricky part. A `wasmer::Instance` can be clonable. To do so, `wasmer_vm::InstanceHandle` must be clonable too. There was a `Clone` implementation, that was constructing a new `wasmer_vm:InstanceHandle` by using the same pointer to `wasmer_vm::Instance`. That's annoying because it's the best way to get many pointers that point to the same `wasmer_vm::Instance` in the nature, and it's difficult to track them. This patch changes the paradigm. There is only one and unique `wasmer_vm::InstanceHandle` per `wasmer::Instance`, including its clones. The handle is now stored inside a `Arc>`. Consequently, when a `wasmer::Instance` is cloned, it uses the same `wasmer_vm::InstanceHandle`, not a clone of it. Bonus: `wasmer::Instance` continues to be `Send` + `Sync`. So. Let's back to our question. When to free `wasmer_vm::InstanceHandle`? Response: When `wasmer::Instance` is dropped. Right? There is a unique path from `wasmer::Instance`, to `wasmer_vm::InstanceHandle`, to `wasmer_vm::Instance` now. So we just need to call `wasmer_vm::InstanceHandle::dealloc` in a specific `Drop` implementation for `wasmer_vm::InstanceHandle`, and the Rust borrow checker does the rest. Yes. … No. There is another use case: It is possible to create a `wasmer_vm::InstanceHandle` with `InstanceHandle::from_vmctx`. Indeed, a `wasmer_vm::VMContext` also stores a pointer to `wasmer_vm::Instance`. In this, we consider `wasmer_vm::VMContext` owns the instance pointer, somehow, and is responsible to free it properly. Consequently, we need another flag inside `wasmer_vm::InstanceHandle` to know whether this structure owns the pointer to `wasmer_vm::Instance` or not. So. Let's back to our question. When to free `wasmer_vm::InstanceHandle`? Response: Inside the `Drop` implementation of `wasmer_vm::InstanceHandle` with its `Self::dealloc` method if and only if the handle owns the pointer to `wasmer_vm::Instance`. Testing with Valgrind shows that the leak has been removed. [`Layout`]: https://doc.rust-lang.org/std/alloc/struct.Layout.html [`alloc`]: https://doc.rust-lang.org/std/alloc/fn.alloc.html [`dealloc`]: https://doc.rust-lang.org/std/alloc/fn.dealloc.html --- lib/api/src/instance.rs | 10 +++++----- lib/engine/src/artifact.rs | 1 + lib/vm/src/instance.rs | 32 ++++++++++++++++++-------------- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/lib/api/src/instance.rs b/lib/api/src/instance.rs index cac647fce10..50b67739d6d 100644 --- a/lib/api/src/instance.rs +++ b/lib/api/src/instance.rs @@ -4,6 +4,7 @@ use crate::module::Module; use crate::store::Store; use crate::InstantiationError; use std::fmt; +use std::sync::{Arc, Mutex}; use wasmer_engine::Resolver; use wasmer_vm::{InstanceHandle, VMContext}; @@ -17,7 +18,7 @@ use wasmer_vm::{InstanceHandle, VMContext}; /// Spec: https://webassembly.github.io/spec/core/exec/runtime.html#module-instances #[derive(Clone)] pub struct Instance { - handle: InstanceHandle, + handle: Arc>, module: Module, /// The exports for an instance. pub exports: Exports, @@ -30,6 +31,7 @@ mod send_test { fn is_send() -> bool { true } + #[test] fn instance_is_send() { assert!(is_send::()); @@ -72,9 +74,7 @@ impl Instance { /// * Runtime errors that happen when running the module `start` function. pub fn new(module: &Module, resolver: &dyn Resolver) -> Result { let store = module.store(); - let handle = module.instantiate(resolver)?; - let exports = module .exports() .map(|export| { @@ -86,7 +86,7 @@ impl Instance { .collect::(); Ok(Self { - handle, + handle: Arc::new(Mutex::new(handle)), module: module.clone(), exports, }) @@ -104,7 +104,7 @@ impl Instance { #[doc(hidden)] pub fn vmctx_ptr(&self) -> *mut VMContext { - self.handle.vmctx_ptr() + self.handle.lock().unwrap().vmctx_ptr() } } diff --git a/lib/engine/src/artifact.rs b/lib/engine/src/artifact.rs index 91cfe4b1efe..d56da85d4a5 100644 --- a/lib/engine/src/artifact.rs +++ b/lib/engine/src/artifact.rs @@ -127,6 +127,7 @@ pub trait Artifact: Send + Sync + Upcastable { InstanceHandle::new( instance_ptr, + true, // the `Instance` is owned by `InstanceHandle`. offsets, module, self.finished_functions().clone(), diff --git a/lib/vm/src/instance.rs b/lib/vm/src/instance.rs index de21f1ae98d..ecc68708247 100644 --- a/lib/vm/src/instance.rs +++ b/lib/vm/src/instance.rs @@ -777,6 +777,13 @@ impl Instance { #[derive(Hash, PartialEq, Eq)] pub struct InstanceHandle { instance: *mut Instance, + + /// Whether `Self` owns `self.instance`. It's not always the case; + /// e.g. when `Self` is built with `Self.from_vmctx`. + /// + /// This information is necessary to know whether `Self` should + /// deallocate `self.instance`. + owned_instance: bool, } /// # Safety @@ -878,7 +885,7 @@ impl InstanceHandle { /// safety. /// /// However the following must be taken care of before calling this function: - /// - `instance_ptr` must point to valid memory sufficiently large for there + /// - `instance_ptr` must point to valid memory sufficiently large for the /// `Instance`. /// - The memory at `instance.tables_ptr()` must be initialized with data for /// all the local tables. @@ -887,6 +894,7 @@ impl InstanceHandle { #[allow(clippy::too_many_arguments)] pub unsafe fn new( instance_ptr: NonNull, + owned_instance: bool, offsets: VMOffsets, module: Arc, finished_functions: BoxedSlice, @@ -924,8 +932,10 @@ impl InstanceHandle { vmctx: VMContext {}, }; ptr::write(instance_ptr, instance); + Self { instance: instance_ptr, + owned_instance, } }; let instance = handle.instance(); @@ -1012,6 +1022,9 @@ impl InstanceHandle { Self { instance: instance as *const Instance as *mut Instance, + + // We consider `vmctx` owns the `Instance`, not `Self`. + owned_instance: false, } } @@ -1140,23 +1153,14 @@ impl InstanceHandle { } } -impl Clone for InstanceHandle { - fn clone(&self) -> Self { - Self { - instance: self.instance, +impl Drop for InstanceHandle { + fn drop(&mut self) { + if self.owned_instance { + unsafe { self.dealloc() } } } } -// TODO: uncomment this, as we need to store the handles -// in the store, and once the store is dropped, then the instances -// will too. -// impl Drop for InstanceHandle { -// fn drop(&mut self) { -// unsafe { self.dealloc() } -// } -// } - fn check_table_init_bounds(instance: &Instance) -> Result<(), Trap> { let module = Arc::clone(&instance.module); for init in &module.table_initializers { From de78a954dfc8c5d3dca2923320180790e77afdf1 Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Thu, 19 Nov 2020 09:42:55 +0100 Subject: [PATCH 4/6] fix(engine-object-file) `module_translation` has been renamed. --- lib/engine-object-file/src/artifact.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/engine-object-file/src/artifact.rs b/lib/engine-object-file/src/artifact.rs index 42b95c09c39..d11b9b20c0d 100644 --- a/lib/engine-object-file/src/artifact.rs +++ b/lib/engine-object-file/src/artifact.rs @@ -128,7 +128,7 @@ impl ObjectFileArtifact { compile_info, translation.function_body_inputs, translation.data_initializers, - translation.module_translation, + translation.module_translation_state, )) } From d211ebe2ed2ac49d7f9cb8e9deeecafbe8916da4 Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Thu, 19 Nov 2020 09:40:43 +0100 Subject: [PATCH 5/6] doc(engine-jit) Fix a typo. --- lib/engine-jit/src/artifact.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/engine-jit/src/artifact.rs b/lib/engine-jit/src/artifact.rs index 5567964c6e7..e89af2aa2ab 100644 --- a/lib/engine-jit/src/artifact.rs +++ b/lib/engine-jit/src/artifact.rs @@ -83,7 +83,7 @@ impl JITArtifact { let compilation = compiler.compile_module( &jit.target(), &compile_info, - // SAFETY: Calling `unwrap` is safe since + // SAFETY: Calling `unwrap` is correct since // `environ.translate()` above will write some data into // `module_translation_state`. translation.module_translation_state.as_ref().unwrap(), From 8e7d7efb406e20805218e3a97820acce5e7e7295 Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Thu, 19 Nov 2020 10:42:50 +0100 Subject: [PATCH 6/6] feat(vm) Remove the `owned_instance` argument of `InstanceHandle::new`. As discussed with @syrusakbary, it's OK to assume `InstanceHandle::new` will always own `instance_ptr`. --- lib/engine/src/artifact.rs | 1 - lib/vm/src/instance.rs | 11 ++++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/engine/src/artifact.rs b/lib/engine/src/artifact.rs index d56da85d4a5..91cfe4b1efe 100644 --- a/lib/engine/src/artifact.rs +++ b/lib/engine/src/artifact.rs @@ -127,7 +127,6 @@ pub trait Artifact: Send + Sync + Upcastable { InstanceHandle::new( instance_ptr, - true, // the `Instance` is owned by `InstanceHandle`. offsets, module, self.finished_functions().clone(), diff --git a/lib/vm/src/instance.rs b/lib/vm/src/instance.rs index ecc68708247..0991992b2b4 100644 --- a/lib/vm/src/instance.rs +++ b/lib/vm/src/instance.rs @@ -779,7 +779,7 @@ pub struct InstanceHandle { instance: *mut Instance, /// Whether `Self` owns `self.instance`. It's not always the case; - /// e.g. when `Self` is built with `Self.from_vmctx`. + /// e.g. when `Self` is built with `Self::from_vmctx`. /// /// This information is necessary to know whether `Self` should /// deallocate `self.instance`. @@ -885,8 +885,10 @@ impl InstanceHandle { /// safety. /// /// However the following must be taken care of before calling this function: - /// - `instance_ptr` must point to valid memory sufficiently large for the - /// `Instance`. + /// - `instance_ptr` must point to valid memory sufficiently large + /// for the `Instance`. `instance_ptr` will be owned by + /// `InstanceHandle`, see `InstanceHandle::owned_instance` to + /// learn more. /// - The memory at `instance.tables_ptr()` must be initialized with data for /// all the local tables. /// - The memory at `instance.memories_ptr()` must be initialized with data for @@ -894,7 +896,6 @@ impl InstanceHandle { #[allow(clippy::too_many_arguments)] pub unsafe fn new( instance_ptr: NonNull, - owned_instance: bool, offsets: VMOffsets, module: Arc, finished_functions: BoxedSlice, @@ -935,7 +936,7 @@ impl InstanceHandle { Self { instance: instance_ptr, - owned_instance, + owned_instance: true, } }; let instance = handle.instance();