Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Polymorphic host functions based on dynamic trampoline generation. #1217

Merged
merged 29 commits into from
Mar 3, 2020
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2fe6e6f
Global trampoline buffer.
losfair Feb 14, 2020
12373bb
Func::new_polymorphic
losfair Feb 14, 2020
5f4561e
Fix compilation error on Aarch64.
losfair Feb 15, 2020
7b0f7ee
Merge remote-tracking branch 'origin/master' into feature/polymorphic-v2
losfair Feb 15, 2020
ad20a00
fix(runtime-core) Use explicit `dyn` for trait objects.
Hywan Feb 17, 2020
2ee1e80
feat(runtime-core) Allow dynamic signature for polymorphic host funct…
Hywan Feb 17, 2020
644755f
Merge pull request #1225 from Hywan/feature/polymorphic-v2.1
losfair Feb 24, 2020
2020901
Merge remote-tracking branch 'origin/master' into feature/polymorphic-v2
losfair Feb 24, 2020
b67acbc
Add `ErasedFunc` for type-erased functions.
losfair Feb 24, 2020
b7c9c18
Add dynamic executable memory allocation & tests to trampolines.
losfair Feb 25, 2020
80f824e
Auto-release trampolines.
losfair Feb 25, 2020
40d823e
Merge remote-tracking branch 'origin/master' into feature/polymorphic-v2
losfair Feb 25, 2020
96d9e39
Specify imports instead of using a `*`.
losfair Feb 26, 2020
a0ea1af
Remove pub(self).
losfair Feb 26, 2020
262d431
Remove unneeded allow(dead_code).
losfair Feb 26, 2020
292e42a
Update lib/runtime-core/src/typed_func.rs
losfair Feb 26, 2020
a438a64
fold() -> sum()
losfair Feb 26, 2020
b0877b2
Add safety notice for `TrampolineBufferBuilder::remove_global`.
losfair Feb 26, 2020
eb89720
Merge remote-tracking branch 'origin/feature/polymorphic-v2' into fea…
losfair Feb 26, 2020
72e6a85
Update changelog.
losfair Feb 26, 2020
32915f0
Merge remote-tracking branch 'origin/master' into feature/polymorphic-v2
losfair Feb 27, 2020
31a72e5
Rename ErasedFunc to DynamicFunc and fix leaky `PolymorphicContext`.
losfair Feb 28, 2020
6516243
Merge remote-tracking branch 'origin/master' into feature/polymorphic-v2
losfair Feb 28, 2020
2ddf9ad
Disallow "fat" closures.
losfair Feb 28, 2020
84179db
Fix changelog.
losfair Feb 29, 2020
4012645
Fix `CodeMemory` doc comments.
losfair Feb 29, 2020
d443ad8
Remove outdated comment.
losfair Feb 29, 2020
d9e744d
Resolve review comments.
losfair Mar 3, 2020
f499dea
Merge remote-tracking branch 'origin/master' into feature/polymorphic-v2
losfair Mar 3, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 29 additions & 2 deletions lib/runtime-core-tests/tests/imports.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
use std::sync::Arc;
use wasmer_runtime_core::{
compile_with, error::RuntimeError, imports, memory::Memory, typed_func::Func,
types::MemoryDescriptor, units::Pages, vm, Instance,
compile_with,
error::RuntimeError,
imports,
memory::Memory,
typed_func::{ErasedFunc, Func},
types::{FuncSig, MemoryDescriptor, Type, Value},
units::Pages,
vm, Instance,
};
use wasmer_runtime_core_tests::{get_compiler, wat2wasm};

Expand Down Expand Up @@ -68,6 +75,7 @@ fn imported_functions_forms(test: &dyn Fn(&Instance)) {
(import "env" "memory" (memory 1 1))
(import "env" "callback_fn" (func $callback_fn (type $type)))
(import "env" "callback_closure" (func $callback_closure (type $type)))
(import "env" "callback_closure_polymorphic" (func $callback_closure_polymorphic (type $type)))
(import "env" "callback_closure_with_env" (func $callback_closure_with_env (type $type)))
(import "env" "callback_fn_with_vmctx" (func $callback_fn_with_vmctx (type $type)))
(import "env" "callback_closure_with_vmctx" (func $callback_closure_with_vmctx (type $type)))
Expand All @@ -86,6 +94,10 @@ fn imported_functions_forms(test: &dyn Fn(&Instance)) {
get_local 0
call $callback_closure)

(func (export "function_closure_polymorphic") (type $type)
get_local 0
call $callback_closure_polymorphic)

(func (export "function_closure_with_env") (type $type)
get_local 0
call $callback_closure_with_env)
Expand Down Expand Up @@ -142,6 +154,16 @@ fn imported_functions_forms(test: &dyn Fn(&Instance)) {
Ok(n + 1)
}),

"callback_closure_polymorphic" => ErasedFunc::new_polymorphic(
losfair marked this conversation as resolved.
Show resolved Hide resolved
Arc::new(FuncSig::new(vec![Type::I32], vec![Type::I32])),
|_, params| -> Vec<Value> {
Hywan marked this conversation as resolved.
Show resolved Hide resolved
match params[0] {
Value::I32(x) => vec![Value::I32(x + 1)],
_ => unreachable!()
}
}
),

// Closure with a captured environment (a single variable + an instance of `Memory`).
"callback_closure_with_env" => Func::new(move |n: i32| -> Result<i32, ()> {
let shift_ = shift + memory.view::<i32>()[0].get();
Expand Down Expand Up @@ -236,6 +258,11 @@ macro_rules! test {

test!(test_fn, function_fn, Ok(2));
test!(test_closure, function_closure, Ok(2));
test!(
test_closure_polymorphic,
function_closure_polymorphic,
Ok(2)
);
test!(
test_closure_with_env,
function_closure_with_env,
Expand Down
20 changes: 18 additions & 2 deletions lib/runtime-core/src/loader.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
//! The loader module functions are used to load an instance.
use crate::{backend::RunnableModule, module::ModuleInfo, types::Type, types::Value, vm::Ctx};
#[cfg(unix)]
use libc::{mmap, mprotect, munmap, MAP_ANON, MAP_PRIVATE, PROT_EXEC, PROT_READ, PROT_WRITE};
use libc::{
mmap, mprotect, munmap, MAP_ANON, MAP_NORESERVE, MAP_PRIVATE, PROT_EXEC, PROT_READ, PROT_WRITE,
};
use std::{
fmt::Debug,
ops::{Deref, DerefMut},
Expand Down Expand Up @@ -169,7 +171,7 @@ impl CodeMemory {
std::ptr::null_mut(),
size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON,
MAP_PRIVATE | MAP_ANON | MAP_NORESERVE,
nlewycky marked this conversation as resolved.
Show resolved Hide resolved
-1,
0,
)
Expand All @@ -196,6 +198,20 @@ impl CodeMemory {
panic!("cannot set code memory to writable");
}
}

/// Makes this code memory both writable and executable.
///
/// Avoid using this if a combination `make_executable` and `make_writable` can be used.
losfair marked this conversation as resolved.
Show resolved Hide resolved
pub fn make_writable_executable(&self) {
if unsafe { mprotect(self.ptr as _, self.size, PROT_READ | PROT_WRITE | PROT_EXEC) } != 0 {
panic!("cannot set code memory to writable and executable");
}
}

/// Returns the backing pointer of this code memory.
pub fn get_backing_ptr(&self) -> *mut u8 {
self.ptr
}
}

#[cfg(unix)]
Expand Down
188 changes: 188 additions & 0 deletions lib/runtime-core/src/trampoline_x64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

use crate::loader::CodeMemory;
use crate::vm::Ctx;
use std::collections::BTreeMap;
use std::fmt;
use std::ptr::NonNull;
use std::sync::Mutex;
use std::{mem, slice};

lazy_static! {
Expand All @@ -29,6 +32,95 @@ lazy_static! {
mem::transmute(ptr)
}
};

static ref TRAMPOLINES: TrampBuffer = TrampBuffer::new(64 * 1048576);
}

/// The global trampoline buffer.
pub(self) struct TrampBuffer {
/// A fixed-(virtual)-size executable+writable buffer for storing trampolines.
buffer: CodeMemory,

/// Allocation state.
alloc: Mutex<AllocState>,
}

/// The allocation state of a `TrampBuffer`.
struct AllocState {
/// Records all allocated blocks in `buffer`.
losfair marked this conversation as resolved.
Show resolved Hide resolved
blocks: BTreeMap<usize, usize>,
}

impl TrampBuffer {
/// Creates a trampoline buffer with a given (virtual) size.
pub(self) fn new(size: usize) -> TrampBuffer {
losfair marked this conversation as resolved.
Show resolved Hide resolved
// Pre-allocate 64 MiB of virtual memory for code.
losfair marked this conversation as resolved.
Show resolved Hide resolved
let mem = CodeMemory::new(size);
mem.make_writable_executable();
TrampBuffer {
buffer: mem,
alloc: Mutex::new(AllocState {
blocks: BTreeMap::new(),
}),
}
}

/// Removes a previously-`insert`ed trampoline.
pub(self) unsafe fn remove(&self, start: NonNull<u8>) {
losfair marked this conversation as resolved.
Show resolved Hide resolved
let start = start.as_ptr() as usize - self.buffer.get_backing_ptr() as usize;
let mut alloc = self.alloc.lock().unwrap();
alloc
.blocks
.remove(&start)
.expect("TrampBuffer::remove(): Attempting to remove a non-existent allocation.");
}

/// Allocates a region of executable memory and copies `buf` to the end of this region.
///
/// Returns `None` if no memory is available.
pub(self) fn insert(&self, buf: &[u8]) -> Option<NonNull<u8>> {
// First, assume an available start position...
let mut assumed_start: usize = 0;

let mut alloc = self.alloc.lock().unwrap();
let mut found = false;

// Then, try invalidating that assumption...
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How common is freeing blocks? And inserting new ones?

Should we instead store a single allocated region, plus a list of "free" regions, keeping consecutive free regions coalesced? This loop would then try to allocate out of the free list. We could even keep the free list sorted by size instead of start location, so that we can quickly find the smallest block that will fit?

Should we even bother reusing freed memory here at all? If I understand right, we produce at most one trampoline per imported function, which is a finite number for a given wasm instance? We could have one of these per instance or module, then just allocate off the end only and delete the whole thing when the instance is deleted?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah the free region method would be the ideal way to track dynamic allocation, as in common memory allocators. Here I just kept track of used blocks for simplicity, maybe it's good to change this.

The lifetime of DynamicFunc is unbounded and we cannot associate it to an instance safely at creation time. Or do you think the public API should be refactored and DynamicFunc::new should be made a method on Instance?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was just thinking it'd be faster to run until the end before trying to use up spaces in the middle. The normal case is that this search will return zero free spaces and we don't need to do a scan until we're actually out of memory.

Talked about DynamicFunc::new with Syrus and Mark, we agreed that putting them on Instance is the wrong way to go. Functions can exist outside of instances anyways, and we're currently failing a spectest over that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just implemented it in a simple way so that we would not increase our code's size too much with "yet another allocator". For performance optimization, can we port over an external allocator like wee_alloc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merging this first and optimizations on the executable memory allocator can be made in another PR.

for (&start, &end) in &alloc.blocks {
if start - assumed_start < buf.len() {
// Unavailable. Move to next free block.
assumed_start = end;
} else {
// This free block can be used.
found = true;
break;
}
}

if !found {
// No previous free blocks were found. Try allocating at the end.
if self.buffer.len() - assumed_start < buf.len() {
// No more free space. Cannot allocate.
losfair marked this conversation as resolved.
Show resolved Hide resolved
return None;
} else {
losfair marked this conversation as resolved.
Show resolved Hide resolved
// Extend towards the end.
}
}

// Now we know `assumed_start` is valid.
let start = assumed_start;
alloc.blocks.insert(start, start + buf.len());

// We have unique ownership to `self.buffer[start..start + buf.len()]`.
let slice = unsafe {
std::slice::from_raw_parts_mut(
self.buffer.get_backing_ptr().offset(start as _),
buf.len(),
)
};
slice.copy_from_slice(buf);
Some(NonNull::new(slice.as_mut_ptr()).unwrap())
}
}

/// An opaque type for pointers to a callable memory location.
Expand Down Expand Up @@ -219,6 +311,21 @@ impl TrampolineBufferBuilder {
idx
}

/// Inserts to the global trampoline buffer.
pub fn insert_global(self) -> Option<NonNull<u8>> {
TRAMPOLINES.insert(&self.code)
}

/// Removes from the global trampoline buffer.
pub unsafe fn remove_global(ptr: NonNull<u8>) {
TRAMPOLINES.remove(ptr);
}

/// Gets the current (non-executable) code in this builder.
pub fn code(&self) -> &[u8] {
&self.code
}

/// Consumes the builder and builds the trampoline buffer.
pub fn build(self) -> TrampolineBuffer {
get_context(); // ensure lazy initialization is completed
Expand Down Expand Up @@ -292,4 +399,85 @@ mod tests {
};
assert_eq!(ret, 136);
}

#[test]
fn test_many_global_trampolines() {
unsafe extern "C" fn inner(n: *const CallContext, args: *const u64) -> u64 {
let n = n as usize;
let mut result: u64 = 0;
for i in 0..n {
result += *args.offset(i as _);
}
result
}

// Use the smallest possible buffer size (page size) to check memory releasing logic.
let buffer = TrampBuffer::new(4096);

// Validate the previous trampoline instead of the current one to ensure that no overwrite happened.
let mut prev: Option<(NonNull<u8>, u64)> = None;

for i in 0..5000usize {
let mut builder = TrampolineBufferBuilder::new();
let n = i % 8;
builder.add_callinfo_trampoline(inner, n as _, n as _);
let ptr = buffer
.insert(builder.code())
.expect("cannot insert new code into global buffer");

if let Some((ptr, expected)) = prev.take() {
use std::mem::transmute;

// Test different argument counts.
unsafe {
match expected {
0 => {
let f = transmute::<_, extern "C" fn() -> u64>(ptr);
assert_eq!(f(), 0);
}
1 => {
let f = transmute::<_, extern "C" fn(u64) -> u64>(ptr);
assert_eq!(f(1), 1);
}
3 => {
let f = transmute::<_, extern "C" fn(u64, u64) -> u64>(ptr);
assert_eq!(f(1, 2), 3);
}
6 => {
let f = transmute::<_, extern "C" fn(u64, u64, u64) -> u64>(ptr);
assert_eq!(f(1, 2, 3), 6);
}
10 => {
let f = transmute::<_, extern "C" fn(u64, u64, u64, u64) -> u64>(ptr);
assert_eq!(f(1, 2, 3, 4), 10);
}
15 => {
let f =
transmute::<_, extern "C" fn(u64, u64, u64, u64, u64) -> u64>(ptr);
assert_eq!(f(1, 2, 3, 4, 5), 15);
}
21 => {
let f = transmute::<
_,
extern "C" fn(u64, u64, u64, u64, u64, u64) -> u64,
>(ptr);
assert_eq!(f(1, 2, 3, 4, 5, 6), 21);
}
28 => {
let f = transmute::<
_,
extern "C" fn(u64, u64, u64, u64, u64, u64, u64) -> u64,
>(ptr);
assert_eq!(f(1, 2, 3, 4, 5, 6, 7), 28);
}
_ => unreachable!(),
}
buffer.remove(ptr);
}
}

let expected = (0..=n as u64).fold(0u64, |a, b| a + b);
losfair marked this conversation as resolved.
Show resolved Hide resolved
prev = Some((ptr, expected))
}
}
}
Loading