Skip to content

Commit

Permalink
Validate modules while translating
Browse files Browse the repository at this point in the history
This commit is a change to cranelift-wasm to validate each function body
as it is translated. Additionally top-level module translation functions
will perform module validation. This commit builds on changes in
wasmparser to perform module validation interwtwined with parsing and
translation. This will be necessary for future wasm features such as
module linking where the type behind a function index, for example, can
be far away in another module. Additionally this also brings a nice
benefit where parsing the binary only happens once (instead of having an
up-front serial validation step) and validation can happen in parallel
for each function.

Most of the changes in this commit are plumbing to make sure everything
lines up right. The major functional change here is that module
compilation should be faster by validating in parallel (or skipping
function validation entirely in the case of a cache hit). Otherwise from
a user-facing perspective nothing should be that different.

This commit does mean that cranelift's translation now inherently
validates the input wasm module. This means that the Spidermonkey
integration of cranelift-wasm will also be validating the function as
it's being translated with cranelift. The associated PR for wasmparser
(bytecodealliance/wasmparser#62) provides the necessary tools to create
a `FuncValidator` for Gecko, but this is something I'll want careful
review for before landing!
  • Loading branch information
alexcrichton committed Aug 4, 2020
1 parent 3d2e0e5 commit 5570c95
Show file tree
Hide file tree
Showing 31 changed files with 381 additions and 356 deletions.
26 changes: 14 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ libc = "0.2.60"
log = "0.4.8"
rayon = "1.2.1"
humantime = "1.3.0"
wasmparser = "0.61"

[dev-dependencies]
env_logger = "0.7.1"
Expand Down
3 changes: 2 additions & 1 deletion cranelift/wasm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ keywords = ["webassembly", "wasm"]
edition = "2018"

[dependencies]
wasmparser = { version = "0.59.0", default-features = false }
wasmparser = { version = "0.61.0", default-features = false }
cranelift-codegen = { path = "../codegen", version = "0.66.0", default-features = false }
cranelift-entity = { path = "../entity", version = "0.66.0" }
cranelift-frontend = { path = "../frontend", version = "0.66.0", default-features = false }
hashbrown = { version = "0.7", optional = true }
itertools = "0.9.0"
log = { version = "0.4.6", default-features = false }
serde = { version = "1.0.94", features = ["derive"], optional = true }
thiserror = "1.0.4"
Expand Down
54 changes: 28 additions & 26 deletions cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
//! argument.
use super::{hash_map, HashMap};
use crate::environ::{FuncEnvironment, GlobalVariable, ReturnMode, WasmResult};
use crate::state::{ControlStackFrame, ElseData, FuncTranslationState, ModuleTranslationState};
use crate::state::{ControlStackFrame, ElseData, FuncTranslationState};
use crate::translation_utils::{
block_with_params, blocktype_params_results, f32_translation, f64_translation,
};
Expand All @@ -43,7 +43,7 @@ use cranelift_frontend::{FunctionBuilder, Variable};
use std::cmp;
use std::convert::TryFrom;
use std::vec::Vec;
use wasmparser::{MemoryImmediate, Operator};
use wasmparser::{FuncValidator, MemoryImmediate, Operator, WasmModuleResources};

// Clippy warns about "flags: _" but its important to document that the flags field is ignored
#[cfg_attr(
Expand All @@ -53,14 +53,14 @@ use wasmparser::{MemoryImmediate, Operator};
/// Translates wasm operators into Cranelift IR instructions. Returns `true` if it inserted
/// a return.
pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
module_translation_state: &ModuleTranslationState,
validator: &mut FuncValidator<impl WasmModuleResources>,
op: &Operator,
builder: &mut FunctionBuilder,
state: &mut FuncTranslationState,
environ: &mut FE,
) -> WasmResult<()> {
if !state.reachable {
translate_unreachable_operator(module_translation_state, &op, builder, state, environ)?;
translate_unreachable_operator(validator, &op, builder, state, environ)?;
return Ok(());
}

Expand Down Expand Up @@ -180,14 +180,14 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
* possible `Block`'s arguments values.
***********************************************************************************/
Operator::Block { ty } => {
let (params, results) = blocktype_params_results(module_translation_state, *ty)?;
let next = block_with_params(builder, results, environ)?;
let (params, results) = blocktype_params_results(validator, *ty)?;
let next = block_with_params(builder, results.clone(), environ)?;
state.push_block(next, params.len(), results.len());
}
Operator::Loop { ty } => {
let (params, results) = blocktype_params_results(module_translation_state, *ty)?;
let loop_body = block_with_params(builder, params, environ)?;
let next = block_with_params(builder, results, environ)?;
let (params, results) = blocktype_params_results(validator, *ty)?;
let loop_body = block_with_params(builder, params.clone(), environ)?;
let next = block_with_params(builder, results.clone(), environ)?;
builder.ins().jump(loop_body, state.peekn(params.len()));
state.push_loop(loop_body, next, params.len(), results.len());

Expand All @@ -204,24 +204,24 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::If { ty } => {
let val = state.pop1();

let (params, results) = blocktype_params_results(module_translation_state, *ty)?;
let (destination, else_data) = if params == results {
let (params, results) = blocktype_params_results(validator, *ty)?;
let (destination, else_data) = if params.clone().eq(results.clone()) {
// It is possible there is no `else` block, so we will only
// allocate a block for it if/when we find the `else`. For now,
// we if the condition isn't true, then we jump directly to the
// destination block following the whole `if...end`. If we do end
// up discovering an `else`, then we will allocate a block for it
// and go back and patch the jump.
let destination = block_with_params(builder, results, environ)?;
let destination = block_with_params(builder, results.clone(), environ)?;
let branch_inst = builder
.ins()
.brz(val, destination, state.peekn(params.len()));
(destination, ElseData::NoElse { branch_inst })
} else {
// The `if` type signature is not valid without an `else` block,
// so we eagerly allocate the `else` block here.
let destination = block_with_params(builder, results, environ)?;
let else_block = block_with_params(builder, params, environ)?;
let destination = block_with_params(builder, results.clone(), environ)?;
let else_block = block_with_params(builder, params.clone(), environ)?;
builder
.ins()
.brz(val, else_block, state.peekn(params.len()));
Expand Down Expand Up @@ -268,9 +268,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let else_block = match *else_data {
ElseData::NoElse { branch_inst } => {
let (params, _results) =
blocktype_params_results(module_translation_state, blocktype)?;
blocktype_params_results(validator, blocktype)?;
debug_assert_eq!(params.len(), num_return_values);
let else_block = block_with_params(builder, params, environ)?;
let else_block =
block_with_params(builder, params.clone(), environ)?;
builder.ins().jump(destination, state.peekn(params.len()));
state.popn(params.len());

Expand Down Expand Up @@ -384,9 +385,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
}
Operator::BrIf { relative_depth } => translate_br_if(*relative_depth, builder, state),
Operator::BrTable { table } => {
let (depths, default) = table.read_table()?;
let mut depths = table.targets().collect::<Result<Vec<_>, _>>()?;
let default = depths.pop().unwrap().0;
let mut min_depth = default;
for depth in &*depths {
for (depth, _) in depths.iter() {
if *depth < min_depth {
min_depth = *depth;
}
Expand All @@ -404,7 +406,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let mut data = JumpTableData::with_capacity(depths.len());
if jump_args_count == 0 {
// No jump arguments
for depth in &*depths {
for (depth, _) in depths.iter() {
let block = {
let i = state.control_stack.len() - 1 - (*depth as usize);
let frame = &mut state.control_stack[i];
Expand All @@ -427,7 +429,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let return_count = jump_args_count;
let mut dest_block_sequence = vec![];
let mut dest_block_map = HashMap::new();
for depth in &*depths {
for (depth, _) in depths.iter() {
let branch_block = match dest_block_map.entry(*depth as usize) {
hash_map::Entry::Occupied(entry) => *entry.get(),
hash_map::Entry::Vacant(entry) => {
Expand Down Expand Up @@ -1051,13 +1053,13 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let index = FuncIndex::from_u32(*function_index);
state.push1(environ.translate_ref_func(builder.cursor(), index)?);
}
Operator::I32AtomicWait { .. } | Operator::I64AtomicWait { .. } => {
Operator::MemoryAtomicWait32 { .. } | Operator::MemoryAtomicWait64 { .. } => {
// The WebAssembly MVP only supports one linear memory and
// wasmparser will ensure that the memory indices specified are
// zero.
let implied_ty = match op {
Operator::I64AtomicWait { .. } => I64,
Operator::I32AtomicWait { .. } => I32,
Operator::MemoryAtomicWait64 { .. } => I64,
Operator::MemoryAtomicWait32 { .. } => I32,
_ => unreachable!(),
};
let heap_index = MemoryIndex::from_u32(0);
Expand All @@ -1078,7 +1080,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
)?;
state.push1(res);
}
Operator::AtomicNotify { .. } => {
Operator::MemoryAtomicNotify { .. } => {
// The WebAssembly MVP only supports one linear memory and
// wasmparser will ensure that the memory indices specified are
// zero.
Expand Down Expand Up @@ -1842,7 +1844,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
/// are dropped but special ones like `End` or `Else` signal the potential end of the unreachable
/// portion so the translation state must be updated accordingly.
fn translate_unreachable_operator<FE: FuncEnvironment + ?Sized>(
module_translation_state: &ModuleTranslationState,
validator: &FuncValidator<impl WasmModuleResources>,
op: &Operator,
builder: &mut FunctionBuilder,
state: &mut FuncTranslationState,
Expand Down Expand Up @@ -1886,7 +1888,7 @@ fn translate_unreachable_operator<FE: FuncEnvironment + ?Sized>(
let else_block = match *else_data {
ElseData::NoElse { branch_inst } => {
let (params, _results) =
blocktype_params_results(module_translation_state, blocktype)?;
blocktype_params_results(validator, blocktype)?;
let else_block = block_with_params(builder, params, environ)?;

// We change the target of the branch instruction.
Expand Down
29 changes: 17 additions & 12 deletions cranelift/wasm/src/environ/dummy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use crate::environ::{
WasmFuncType, WasmResult,
};
use crate::func_translator::FuncTranslator;
use crate::state::ModuleTranslationState;
use crate::translation_utils::{
DataIndex, DefinedFuncIndex, ElemIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex,
SignatureIndex, Table, TableIndex,
Expand All @@ -26,6 +25,7 @@ use cranelift_frontend::FunctionBuilder;
use std::boxed::Box;
use std::string::String;
use std::vec::Vec;
use wasmparser::{FuncValidator, FunctionBody, ValidatorResources, WasmFeatures};

/// Compute a `ir::ExternalName` for a given wasm function index.
fn get_func_name(func_index: FuncIndex) -> ir::ExternalName {
Expand Down Expand Up @@ -738,10 +738,11 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {

fn define_function_body(
&mut self,
module_translation_state: &ModuleTranslationState,
body_bytes: &'data [u8],
body_offset: usize,
mut validator: FuncValidator<ValidatorResources>,
body: FunctionBody<'data>,
) -> WasmResult<()> {
self.func_bytecode_sizes
.push(body.get_binary_reader().bytes_remaining());
let func = {
let mut func_environ = DummyFuncEnvironment::new(&self.info, self.return_mode);
let func_index =
Expand All @@ -752,16 +753,10 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
if self.debug_info {
func.collect_debug_info();
}
self.trans.translate(
module_translation_state,
body_bytes,
body_offset,
&mut func,
&mut func_environ,
)?;
self.trans
.translate_body(&mut validator, body, &mut func, &mut func_environ)?;
func
};
self.func_bytecode_sizes.push(body_bytes.len());
self.info.function_bodies.push(func);
Ok(())
}
Expand All @@ -773,4 +768,14 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
fn declare_func_name(&mut self, func_index: FuncIndex, name: &'data str) {
self.function_names[func_index] = String::from(name);
}

fn wasm_features(&self) -> WasmFeatures {
WasmFeatures {
multi_value: true,
simd: true,
reference_types: true,
bulk_memory: true,
..WasmFeatures::default()
}
}
}
Loading

0 comments on commit 5570c95

Please sign in to comment.