diff --git a/CHANGELOG.md b/CHANGELOG.md index 4dcbf5438fe..8209933db42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## **[Unreleased]** +- [#1303](https://github.com/wasmerio/wasmer/pull/1303) NaN canonicalization for singlepass backend. - [#1305](https://github.com/wasmerio/wasmer/pull/1305) Handle panics from DynamicFunc. - [#1301](https://github.com/wasmerio/wasmer/pull/1301) Update supported stable Rust version to 1.41.1. - [#1300](https://github.com/wasmerio/wasmer/pull/1300) Add support for multiple versions of WASI tests: wasitests now test all versions of WASI. diff --git a/lib/clif-backend/src/code.rs b/lib/clif-backend/src/code.rs index ada71d0a394..3b9bdf3940c 100644 --- a/lib/clif-backend/src/code.rs +++ b/lib/clif-backend/src/code.rs @@ -19,7 +19,7 @@ use std::mem; use std::sync::{Arc, RwLock}; use wasmer_runtime_core::error::CompileError; use wasmer_runtime_core::{ - backend::{CacheGen, Token}, + backend::{CacheGen, CompilerConfig, Token}, cache::{Artifact, Error as CacheError}, codegen::*, memory::MemoryType, @@ -36,7 +36,7 @@ use wasmparser::Type as WpType; static BACKEND_ID: &str = "cranelift"; pub struct CraneliftModuleCodeGenerator { - isa: Box, + isa: Option>, signatures: Option>>, pub clif_signatures: Map, function_signatures: Option>>, @@ -47,9 +47,8 @@ impl ModuleCodeGenerator for CraneliftModuleCodeGenerator { fn new() -> Self { - let isa = get_isa(); CraneliftModuleCodeGenerator { - isa, + isa: None, clif_signatures: Map::new(), functions: vec![], function_signatures: None, @@ -100,7 +99,7 @@ impl ModuleCodeGenerator position: Position::default(), func_env: FunctionEnvironment { module_info: Arc::clone(&module_info), - target_config: self.isa.frontend_config().clone(), + target_config: self.isa.as_ref().unwrap().frontend_config().clone(), clif_signatures: self.clif_signatures.clone(), }, loc, @@ -162,9 +161,9 @@ impl ModuleCodeGenerator } let (func_resolver_builder, debug_metadata, handler_data) = - FuncResolverBuilder::new(&*self.isa, func_bodies, module_info)?; + FuncResolverBuilder::new(&**self.isa.as_ref().unwrap(), func_bodies, module_info)?; - let trampolines = Arc::new(Trampolines::new(&*self.isa, module_info)); + let trampolines = Arc::new(Trampolines::new(&**self.isa.as_ref().unwrap(), module_info)); let signatures_empty = Map::new(); let signatures = if self.signatures.is_some() { @@ -191,9 +190,19 @@ impl ModuleCodeGenerator )) } + fn feed_compiler_config(&mut self, config: &CompilerConfig) -> Result<(), CodegenError> { + self.isa = Some(get_isa(Some(config))); + Ok(()) + } + fn feed_signatures(&mut self, signatures: Map) -> Result<(), CodegenError> { self.signatures = Some(Arc::new(signatures)); - let call_conv = self.isa.frontend_config().default_call_conv; + let call_conv = self + .isa + .as_ref() + .unwrap() + .frontend_config() + .default_call_conv; for (_sig_idx, func_sig) in self.signatures.as_ref().unwrap().iter() { self.clif_signatures .push(convert_func_sig(func_sig, call_conv)); @@ -1302,7 +1311,10 @@ fn generate_signature( } fn pointer_type(mcg: &CraneliftModuleCodeGenerator) -> ir::Type { - ir::Type::int(u16::from(mcg.isa.frontend_config().pointer_bits())).unwrap() + ir::Type::int(u16::from( + mcg.isa.as_ref().unwrap().frontend_config().pointer_bits(), + )) + .unwrap() } /// Declare local variables for the signature parameters that correspond to WebAssembly locals. diff --git a/lib/clif-backend/src/lib.rs b/lib/clif-backend/src/lib.rs index 95fd334a4ad..c4fb65736c3 100644 --- a/lib/clif-backend/src/lib.rs +++ b/lib/clif-backend/src/lib.rs @@ -29,6 +29,7 @@ use cranelift_codegen::{ settings::{self, Configurable}, }; use target_lexicon::Triple; +use wasmer_runtime_core::{backend::CompilerConfig, codegen::SimpleStreamingCompilerGen}; #[macro_use] extern crate serde_derive; @@ -36,7 +37,7 @@ extern crate serde_derive; extern crate rayon; extern crate serde; -fn get_isa() -> Box { +fn get_isa(config: Option<&CompilerConfig>) -> Box { let flags = { let mut builder = settings::builder(); builder.set("opt_level", "speed_and_size").unwrap(); @@ -48,6 +49,12 @@ fn get_isa() -> Box { builder.set("enable_verifier", "false").unwrap(); } + if let Some(config) = config { + if config.nan_canonicalization { + builder.set("enable_nan_canonicalization", "true").unwrap(); + } + } + let flags = settings::Flags::new(builder); debug_assert_eq!(flags.opt_level(), settings::OptLevel::SpeedAndSize); flags @@ -58,8 +65,6 @@ fn get_isa() -> Box { /// The current version of this crate pub const VERSION: &str = env!("CARGO_PKG_VERSION"); -use wasmer_runtime_core::codegen::SimpleStreamingCompilerGen; - /// Streaming compiler implementation for the Cranelift backed. Compiles web assembly binary into /// machine code. pub type CraneliftCompiler = SimpleStreamingCompilerGen< diff --git a/lib/clif-backend/src/trampoline.rs b/lib/clif-backend/src/trampoline.rs index 70854c1ea90..6b6c5d7ea08 100644 --- a/lib/clif-backend/src/trampoline.rs +++ b/lib/clif-backend/src/trampoline.rs @@ -212,8 +212,7 @@ fn wasm_ty_to_clif(ty: Type) -> ir::types::Type { } fn generate_trampoline_signature() -> ir::Signature { - let isa = super::get_isa(); - let call_convention = isa.default_call_conv(); + let call_convention = super::get_isa(None).default_call_conv(); let mut sig = ir::Signature::new(call_convention); let ptr_param = ir::AbiParam { @@ -229,8 +228,7 @@ fn generate_trampoline_signature() -> ir::Signature { } fn generate_export_signature(func_sig: &FuncSig) -> ir::Signature { - let isa = super::get_isa(); - let call_convention = isa.default_call_conv(); + let call_convention = super::get_isa(None).default_call_conv(); let mut export_clif_sig = ir::Signature::new(call_convention); let func_sig_iter = func_sig.params().iter().map(|wasm_ty| ir::AbiParam { diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs index cebde5daefa..822447076d5 100644 --- a/lib/llvm-backend/src/code.rs +++ b/lib/llvm-backend/src/code.rs @@ -3701,7 +3701,7 @@ impl<'ctx> FunctionCodeGenerator for LLVMFunctionCodeGenerator<'ct .try_as_basic_value() .left() .unwrap(); - state.push1_extra(res, i); + state.push1_extra(res, i | ExtraInfo::pending_f32_nan()); } Operator::F64Trunc => { let (v, i) = state.pop1_extra()?; @@ -3714,7 +3714,7 @@ impl<'ctx> FunctionCodeGenerator for LLVMFunctionCodeGenerator<'ct .try_as_basic_value() .left() .unwrap(); - state.push1_extra(res, i); + state.push1_extra(res, i | ExtraInfo::pending_f64_nan()); } Operator::F32Nearest => { let (v, i) = state.pop1_extra()?; @@ -3727,7 +3727,7 @@ impl<'ctx> FunctionCodeGenerator for LLVMFunctionCodeGenerator<'ct .try_as_basic_value() .left() .unwrap(); - state.push1_extra(res, i); + state.push1_extra(res, i | ExtraInfo::pending_f32_nan()); } Operator::F64Nearest => { let (v, i) = state.pop1_extra()?; @@ -3740,7 +3740,7 @@ impl<'ctx> FunctionCodeGenerator for LLVMFunctionCodeGenerator<'ct .try_as_basic_value() .left() .unwrap(); - state.push1_extra(res, i); + state.push1_extra(res, i | ExtraInfo::pending_f64_nan()); } Operator::F32Abs => { let (v, i) = state.pop1_extra()?; diff --git a/lib/runtime-core/src/backend.rs b/lib/runtime-core/src/backend.rs index 4aca2d2a71c..eabe526d8c7 100644 --- a/lib/runtime-core/src/backend.rs +++ b/lib/runtime-core/src/backend.rs @@ -132,6 +132,10 @@ pub struct CompilerConfig { /// When enabled there can be a small amount of runtime performance overhead. pub full_preemption: bool, + /// Always choose a unique bit representation for NaN. + /// Enabling this makes execution deterministic but increases runtime overhead. + pub nan_canonicalization: bool, + pub features: Features, // Target info. Presently only supported by LLVM. diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 5903508fca1..38df4e67b6b 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -39,8 +39,8 @@ use wasmer_runtime_core::{ structures::{Map, TypedIndex}, typed_func::{Trampoline, Wasm}, types::{ - FuncIndex, FuncSig, GlobalIndex, LocalFuncIndex, LocalOrImport, MemoryIndex, SigIndex, - TableIndex, Type, + FuncIndex, FuncSig, GlobalIndex, ImportedGlobalIndex, LocalFuncIndex, LocalGlobalIndex, + LocalOrImport, MemoryIndex, SigIndex, TableIndex, Type, }, vm::{self, LocalGlobal, LocalTable, INTERNALS_SIZE}, wasmparser::{MemoryImmediate, Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType}, @@ -220,8 +220,12 @@ pub struct X64FunctionCode { returns: SmallVec<[WpType; 1]>, locals: Vec, num_params: usize, - num_locals: usize, + local_types: Vec, value_stack: Vec, + + /// Metadata about floating point values on the stack. + fp_stack: Vec, + control_stack: Vec, machine: Machine, unreachable_depth: usize, @@ -231,6 +235,127 @@ pub struct X64FunctionCode { exception_table: Option, } +/// Metadata about a floating-point value. +#[derive(Copy, Clone, Debug)] +struct FloatValue { + /// Do we need to canonicalize the value before its bit pattern is next observed? If so, how? + canonicalization: Option, + + /// Corresponding depth in the main value stack. + depth: usize, +} + +impl FloatValue { + fn new(depth: usize) -> Self { + FloatValue { + canonicalization: None, + depth, + } + } + + fn cncl_f32(depth: usize) -> Self { + FloatValue { + canonicalization: Some(CanonicalizeType::F32), + depth, + } + } + + fn cncl_f64(depth: usize) -> Self { + FloatValue { + canonicalization: Some(CanonicalizeType::F64), + depth, + } + } + + fn promote(self, depth: usize) -> FloatValue { + FloatValue { + canonicalization: match self.canonicalization { + Some(CanonicalizeType::F32) => Some(CanonicalizeType::F64), + Some(CanonicalizeType::F64) => panic!("cannot promote F64"), + None => None, + }, + depth, + } + } + + fn demote(self, depth: usize) -> FloatValue { + FloatValue { + canonicalization: match self.canonicalization { + Some(CanonicalizeType::F64) => Some(CanonicalizeType::F32), + Some(CanonicalizeType::F32) => panic!("cannot demote F32"), + None => None, + }, + depth, + } + } +} + +/// Type of a pending canonicalization floating point value. +/// Sometimes we don't have the type information elsewhere and therefore we need to track it here. +#[derive(Copy, Clone, Debug)] +enum CanonicalizeType { + F32, + F64, +} + +impl CanonicalizeType { + fn to_size(&self) -> Size { + match self { + CanonicalizeType::F32 => Size::S32, + CanonicalizeType::F64 => Size::S64, + } + } +} + +trait PopMany { + fn peek1(&self) -> Result<&T, CodegenError>; + fn pop1(&mut self) -> Result; + fn pop2(&mut self) -> Result<(T, T), CodegenError>; +} + +impl PopMany for Vec { + fn peek1(&self) -> Result<&T, CodegenError> { + match self.last() { + Some(x) => Ok(x), + None => Err(CodegenError { + message: "peek1() expects at least 1 element".into(), + }), + } + } + fn pop1(&mut self) -> Result { + match self.pop() { + Some(x) => Ok(x), + None => Err(CodegenError { + message: "pop1() expects at least 1 element".into(), + }), + } + } + fn pop2(&mut self) -> Result<(T, T), CodegenError> { + if self.len() < 2 { + return Err(CodegenError { + message: "pop2() expects at least 2 elements".into(), + }); + } + + let right = self.pop().unwrap(); + let left = self.pop().unwrap(); + Ok((left, right)) + } +} + +trait WpTypeExt { + fn is_float(&self) -> bool; +} + +impl WpTypeExt for WpType { + fn is_float(&self) -> bool { + match self { + WpType::F32 | WpType::F64 => true, + _ => false, + } + } +} + enum FuncPtrInner {} #[repr(transparent)] #[derive(Copy, Clone, Debug)] @@ -282,6 +407,7 @@ pub struct ControlFrame { pub if_else: IfElseState, pub returns: SmallVec<[WpType; 1]>, pub value_stack_depth: usize, + pub fp_stack_depth: usize, pub state: MachineState, pub state_diff_id: usize, } @@ -645,6 +771,7 @@ struct CodegenConfig { enforce_stack_check: bool, track_state: bool, full_preemption: bool, + nan_canonicalization: bool, } impl ModuleCodeGenerator @@ -738,9 +865,10 @@ impl ModuleCodeGenerator breakpoints: Some(breakpoints), returns: smallvec![], locals: vec![], + local_types: vec![], num_params: 0, - num_locals: 0, value_stack: vec![], + fp_stack: vec![], control_stack: vec![], machine, unreachable_depth: 0, @@ -1021,6 +1149,7 @@ impl ModuleCodeGenerator enforce_stack_check: config.enforce_stack_check, track_state: config.track_state, full_preemption: config.full_preemption, + nan_canonicalization: config.nan_canonicalization, })); Ok(()) } @@ -1103,6 +1232,53 @@ impl X64FunctionCode { ret } + /// Canonicalizes the floating point value at `input` into `output`. + fn canonicalize_nan( + a: &mut Assembler, + m: &mut Machine, + sz: Size, + input: Location, + output: Location, + ) { + let tmp1 = m.acquire_temp_xmm().unwrap(); + let tmp2 = m.acquire_temp_xmm().unwrap(); + let tmp3 = m.acquire_temp_xmm().unwrap(); + let tmpg1 = m.acquire_temp_gpr().unwrap(); + + Self::emit_relaxed_binop(a, m, Assembler::emit_mov, sz, input, Location::XMM(tmp1)); + + match sz { + Size::S32 => { + a.emit_vcmpunordss(tmp1, XMMOrMemory::XMM(tmp1), tmp2); + a.emit_mov( + Size::S32, + Location::Imm32(0x7FC0_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp3)); + a.emit_vblendvps(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1); + } + Size::S64 => { + a.emit_vcmpunordsd(tmp1, XMMOrMemory::XMM(tmp1), tmp2); + a.emit_mov( + Size::S64, + Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp3)); + a.emit_vblendvpd(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1); + } + _ => unreachable!(), + } + + Self::emit_relaxed_binop(a, m, Assembler::emit_mov, sz, Location::XMM(tmp1), output); + + m.release_temp_gpr(tmpg1); + m.release_temp_xmm(tmp3); + m.release_temp_xmm(tmp2); + m.release_temp_xmm(tmp1); + } + /// Moves `loc` to a valid location for `div`/`idiv`. fn emit_relaxed_xdiv( a: &mut Assembler, @@ -2506,14 +2682,14 @@ impl FunctionCodeGenerator for X64FunctionCode { Ok(()) } - fn feed_param(&mut self, _ty: WpType) -> Result<(), CodegenError> { + fn feed_param(&mut self, ty: WpType) -> Result<(), CodegenError> { self.num_params += 1; - self.num_locals += 1; + self.local_types.push(ty); Ok(()) } - fn feed_local(&mut self, _ty: WpType, n: usize, _loc: u32) -> Result<(), CodegenError> { - self.num_locals += n; + fn feed_local(&mut self, ty: WpType, n: usize, _loc: u32) -> Result<(), CodegenError> { + self.local_types.extend(iter::repeat(ty).take(n)); Ok(()) } @@ -2550,7 +2726,7 @@ impl FunctionCodeGenerator for X64FunctionCode { self.locals = self .machine - .init_locals(a, self.num_locals, self.num_params); + .init_locals(a, self.local_types.len(), self.num_params); self.machine.state.register_values [X64Register::GPR(Machine::get_vmctx_reg()).to_index().0] = MachineValue::Vmctx; @@ -2578,6 +2754,7 @@ impl FunctionCodeGenerator for X64FunctionCode { if_else: IfElseState::None, returns: self.returns.clone(), value_stack_depth: 0, + fp_stack_depth: 0, state: self.machine.state.clone(), state_diff_id, }); @@ -2631,6 +2808,8 @@ impl FunctionCodeGenerator for X64FunctionCode { module_info: &ModuleInfo, _source_loc: u32, ) -> Result<(), CodegenError> { + assert!(self.fp_stack.len() <= self.value_stack.len()); + let a = self.assembler.as_mut().unwrap(); match ev { @@ -2797,12 +2976,14 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::Memory(tmp, (local_index.index() as i32) * 8), Location::GPR(tmp), ); + let ty = type_to_wp_type(module_info.globals[local_index].desc.ty); + if ty.is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } self.machine.acquire_locations( a, - &[( - type_to_wp_type(module_info.globals[local_index].desc.ty), - MachineValue::WasmStack(self.value_stack.len()), - )], + &[(ty, MachineValue::WasmStack(self.value_stack.len()))], false, )[0] } @@ -2820,12 +3001,14 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::Memory(tmp, (import_index.index() as i32) * 8), Location::GPR(tmp), ); + let ty = type_to_wp_type(module_info.imported_globals[import_index].1.ty); + if ty.is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } self.machine.acquire_locations( a, - &[( - type_to_wp_type(module_info.imported_globals[import_index].1.ty), - MachineValue::WasmStack(self.value_stack.len()), - )], + &[(ty, MachineValue::WasmStack(self.value_stack.len()))], false, )[0] } @@ -2850,7 +3033,7 @@ impl FunctionCodeGenerator for X64FunctionCode { let tmp = self.machine.acquire_temp_gpr().unwrap(); - if global_index < module_info.imported_globals.len() { + let ty = if global_index < module_info.imported_globals.len() { a.emit_mov( Size::S64, Location::Memory( @@ -2859,6 +3042,11 @@ impl FunctionCodeGenerator for X64FunctionCode { ), Location::GPR(tmp), ); + type_to_wp_type( + module_info.imported_globals[ImportedGlobalIndex::new(global_index)] + .1 + .ty, + ) } else { global_index -= module_info.imported_globals.len(); if global_index >= module_info.globals.len() { @@ -2874,21 +3062,54 @@ impl FunctionCodeGenerator for X64FunctionCode { ), Location::GPR(tmp), ); - } + type_to_wp_type( + module_info.globals[LocalGlobalIndex::new(global_index)] + .desc + .ty, + ) + }; a.emit_mov( Size::S64, Location::Memory(tmp, (global_index as i32) * 8), Location::GPR(tmp), ); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::Memory(tmp, LocalGlobal::offset_data() as i32), - ); - + if ty.is_float() { + let fp = self.fp_stack.pop1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match ty { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); + } self.machine.release_temp_gpr(tmp); } Operator::LocalGet { local_index } => { @@ -2907,33 +3128,95 @@ impl FunctionCodeGenerator for X64FunctionCode { ret, ); self.value_stack.push(ret); + if self.local_types[local_index].is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } } Operator::LocalSet { local_index } => { let local_index = local_index as usize; let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - self.locals[local_index], - ); + if self.local_types[local_index].is_float() { + let fp = self.fp_stack.pop1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + self.locals[local_index], + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } } Operator::LocalTee { local_index } => { let local_index = local_index as usize; let loc = *self.value_stack.last().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - self.locals[local_index], - ); + if self.local_types[local_index].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + self.locals[local_index], + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } } Operator::I32Const { value } => { self.value_stack.push(Location::Imm32(value as u32)); @@ -3905,36 +4188,61 @@ impl FunctionCodeGenerator for X64FunctionCode { Operator::F32Const { value } => { self.value_stack.push(Location::Imm32(value.bits())); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); self.machine .state .wasm_stack .push(WasmAbstractValue::Const(value.bits() as u64)); } - Operator::F32Add => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vaddss, - )?, - Operator::F32Sub => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vsubss, - )?, - Operator::F32Mul => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vmulss, - )?, - Operator::F32Div => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vdivss, - )?, + Operator::F32Add => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vaddss, + )?; + } + Operator::F32Sub => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsubss, + )? + } + Operator::F32Mul => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmulss, + )? + } + Operator::F32Div => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vdivss, + )? + } Operator::F32Max => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4058,6 +4366,9 @@ impl FunctionCodeGenerator for X64FunctionCode { } } Operator::F32Min => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4186,72 +4497,115 @@ impl FunctionCodeGenerator for X64FunctionCode { self.machine.release_temp_xmm(tmp1); } } - Operator::F32Eq => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpeqss, - )?, - Operator::F32Ne => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpneqss, - )?, - Operator::F32Lt => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpltss, - )?, - Operator::F32Le => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpless, - )?, - Operator::F32Gt => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpgtss, - )?, - Operator::F32Ge => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpgess, - )?, - Operator::F32Nearest => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundss_nearest, - )?, - Operator::F32Floor => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundss_floor, - )?, - Operator::F32Ceil => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundss_ceil, - )?, - Operator::F32Trunc => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundss_trunc, - )?, - Operator::F32Sqrt => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vsqrtss, - )?, + Operator::F32Eq => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpeqss, + )? + } + Operator::F32Ne => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpneqss, + )? + } + Operator::F32Lt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpltss, + )? + } + Operator::F32Le => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpless, + )? + } + Operator::F32Gt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgtss, + )? + } + Operator::F32Ge => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgess, + )? + } + Operator::F32Nearest => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_nearest, + )? + } + Operator::F32Floor => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_floor, + )? + } + Operator::F32Ceil => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_ceil, + )? + } + Operator::F32Trunc => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_trunc, + )? + } + Operator::F32Sqrt => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsqrtss, + )? + } Operator::F32Copysign => { let loc_b = @@ -4265,10 +4619,34 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; self.value_stack.push(ret); + let (fp_src1, fp_src2) = self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + let tmp1 = self.machine.acquire_temp_gpr().unwrap(); let tmp2 = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1)); - a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2)); + + if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization { + for (fp, loc, tmp) in [(fp_src1, loc_a, tmp1), (fp_src2, loc_b, tmp2)].iter() { + match fp.canonicalization { + Some(_) => { + Self::canonicalize_nan( + a, + &mut self.machine, + Size::S32, + *loc, + Location::GPR(*tmp), + ); + } + None => { + a.emit_mov(Size::S32, *loc, Location::GPR(*tmp)); + } + } + } + } else { + a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2)); + } a.emit_and( Size::S32, Location::Imm32(0x7fffffffu32), @@ -4286,6 +4664,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F32Abs => { + // Preserve canonicalization state. + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -4306,6 +4686,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F32Neg => { + // Preserve canonicalization state. + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -4346,36 +4728,62 @@ impl FunctionCodeGenerator for X64FunctionCode { Operator::F64Const { value } => { self.value_stack.push(Location::Imm64(value.bits())); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); self.machine .state .wasm_stack .push(WasmAbstractValue::Const(value.bits())); } - Operator::F64Add => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vaddsd, - )?, - Operator::F64Sub => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vsubsd, - )?, - Operator::F64Mul => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vmulsd, - )?, - Operator::F64Div => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vdivsd, - )?, + Operator::F64Add => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vaddsd, + )? + } + Operator::F64Sub => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsubsd, + )? + } + Operator::F64Mul => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmulsd, + )? + } + Operator::F64Div => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vdivsd, + )? + } Operator::F64Max => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); + if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4499,6 +4907,10 @@ impl FunctionCodeGenerator for X64FunctionCode { } } Operator::F64Min => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); + if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4627,72 +5039,115 @@ impl FunctionCodeGenerator for X64FunctionCode { self.machine.release_temp_xmm(tmp1); } } - Operator::F64Eq => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpeqsd, - )?, - Operator::F64Ne => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpneqsd, - )?, - Operator::F64Lt => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpltsd, - )?, - Operator::F64Le => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmplesd, - )?, - Operator::F64Gt => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpgtsd, - )?, - Operator::F64Ge => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpgesd, - )?, - Operator::F64Nearest => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundsd_nearest, - )?, - Operator::F64Floor => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundsd_floor, - )?, - Operator::F64Ceil => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundsd_ceil, - )?, - Operator::F64Trunc => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundsd_trunc, - )?, - Operator::F64Sqrt => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vsqrtsd, - )?, + Operator::F64Eq => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpeqsd, + )? + } + Operator::F64Ne => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpneqsd, + )? + } + Operator::F64Lt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpltsd, + )? + } + Operator::F64Le => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmplesd, + )? + } + Operator::F64Gt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgtsd, + )? + } + Operator::F64Ge => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgesd, + )? + } + Operator::F64Nearest => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_nearest, + )? + } + Operator::F64Floor => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_floor, + )? + } + Operator::F64Ceil => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_ceil, + )? + } + Operator::F64Trunc => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_trunc, + )? + } + Operator::F64Sqrt => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsqrtsd, + )? + } Operator::F64Copysign => { let loc_b = @@ -4706,12 +5161,36 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; self.value_stack.push(ret); + let (fp_src1, fp_src2) = self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + let tmp1 = self.machine.acquire_temp_gpr().unwrap(); let tmp2 = self.machine.acquire_temp_gpr().unwrap(); - let c = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1)); - a.emit_mov(Size::S64, loc_b, Location::GPR(tmp2)); + if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization { + for (fp, loc, tmp) in [(fp_src1, loc_a, tmp1), (fp_src2, loc_b, tmp2)].iter() { + match fp.canonicalization { + Some(_) => { + Self::canonicalize_nan( + a, + &mut self.machine, + Size::S64, + *loc, + Location::GPR(*tmp), + ); + } + None => { + a.emit_mov(Size::S64, *loc, Location::GPR(*tmp)); + } + } + } + } else { + a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S64, loc_b, Location::GPR(tmp2)); + } + + let c = self.machine.acquire_temp_gpr().unwrap(); a.emit_mov( Size::S64, @@ -4736,6 +5215,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F64Abs => { + // Preserve canonicalization state. + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -4762,6 +5243,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F64Neg => { + // Preserve canonicalization state. + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -4799,18 +5282,26 @@ impl FunctionCodeGenerator for X64FunctionCode { } } - Operator::F64PromoteF32 => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcvtss2sd, - )?, - Operator::F32DemoteF64 => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcvtsd2ss, - )?, + Operator::F64PromoteF32 => { + let fp = self.fp_stack.pop1()?; + self.fp_stack.push(fp.promote(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcvtss2sd, + )? + } + Operator::F32DemoteF64 => { + let fp = self.fp_stack.pop1()?; + self.fp_stack.push(fp.demote(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcvtsd2ss, + )? + } Operator::I32ReinterpretF32 => { let loc = @@ -4821,16 +5312,24 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + let fp = self.fp_stack.pop1()?; - if loc != ret { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - ret, - ); + if !a.arch_supports_canonicalize_nan() + || !self.config.nan_canonicalization + || fp.canonicalization.is_none() + { + if loc != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + ret, + ); + } + } else { + Self::canonicalize_nan(a, &mut self.machine, Size::S32, loc, ret); } } Operator::F32ReinterpretI32 => { @@ -4842,6 +5341,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); if loc != ret { Self::emit_relaxed_binop( @@ -4864,16 +5365,24 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + let fp = self.fp_stack.pop1()?; - if loc != ret { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - ret, - ); + if !a.arch_supports_canonicalize_nan() + || !self.config.nan_canonicalization + || fp.canonicalization.is_none() + { + if loc != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + ret, + ); + } + } else { + Self::canonicalize_nan(a, &mut self.machine, Size::S64, loc, ret); } } Operator::F64ReinterpretI64 => { @@ -4885,6 +5394,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); if loc != ret { Self::emit_relaxed_binop( @@ -4907,6 +5418,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -4967,6 +5479,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5018,6 +5531,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5078,6 +5592,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5136,6 +5651,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5196,6 +5712,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5254,6 +5771,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5338,6 +5856,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5415,6 +5934,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5476,6 +5996,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5528,6 +6049,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5594,6 +6116,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5657,6 +6180,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5718,6 +6242,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5776,6 +6301,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5861,6 +6387,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5938,6 +6465,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -5982,6 +6511,9 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. + if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); let tmp_in = self.machine.acquire_temp_gpr().unwrap(); @@ -6025,6 +6557,9 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. + if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); let tmp_in = self.machine.acquire_temp_gpr().unwrap(); @@ -6068,6 +6603,9 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. + if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); let tmp_in = self.machine.acquire_temp_gpr().unwrap(); @@ -6128,6 +6666,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6172,6 +6712,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6216,6 +6758,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6260,6 +6804,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6339,6 +6885,31 @@ impl FunctionCodeGenerator for X64FunctionCode { self.machine.release_locations_only_osr_state(params.len()); + // Pop arguments off the FP stack and canonicalize them if needed. + // + // Canonicalization state will be lost across function calls, so early canonicalization + // is necessary here. + while let Some(fp) = self.fp_stack.last() { + if fp.depth >= self.value_stack.len() { + let index = fp.depth - self.value_stack.len(); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + fp.canonicalization.unwrap().to_size(), + params[index], + params[index], + ); + } + self.fp_stack.pop().unwrap(); + } else { + break; + } + } + Self::emit_call_sysv_label( a, &mut self.machine, @@ -6359,13 +6930,12 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - match return_types[0] { - WpType::F32 | WpType::F64 => { - a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); - } - _ => { - a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); - } + if return_types[0].is_float() { + a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } else { + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); } } } @@ -6390,6 +6960,31 @@ impl FunctionCodeGenerator for X64FunctionCode { .collect(); self.machine.release_locations_only_regs(¶ms); + // Pop arguments off the FP stack and canonicalize them if needed. + // + // Canonicalization state will be lost across function calls, so early canonicalization + // is necessary here. + while let Some(fp) = self.fp_stack.last() { + if fp.depth >= self.value_stack.len() { + let index = fp.depth - self.value_stack.len(); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + fp.canonicalization.unwrap().to_size(), + params[index], + params[index], + ); + } + self.fp_stack.pop().unwrap(); + } else { + break; + } + } + let table_base = self.machine.acquire_temp_gpr().unwrap(); let table_count = self.machine.acquire_temp_gpr().unwrap(); let sigidx = self.machine.acquire_temp_gpr().unwrap(); @@ -6505,13 +7100,12 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - match return_types[0] { - WpType::F32 | WpType::F64 => { - a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); - } - _ => { - a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); - } + if return_types[0].is_float() { + a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } else { + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); } } } @@ -6536,6 +7130,7 @@ impl FunctionCodeGenerator for X64FunctionCode { } }, value_stack_depth: self.value_stack.len(), + fp_stack_depth: self.fp_stack.len(), state: self.machine.state.clone(), state_diff_id: Self::get_state_diff( &self.machine, @@ -6559,19 +7154,49 @@ impl FunctionCodeGenerator for X64FunctionCode { if !was_unreachable && frame.returns.len() > 0 { let loc = *self.value_stack.last().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } } let released: &[Location] = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations(a, released); self.value_stack.truncate(frame.value_stack_depth); + self.fp_stack.truncate(frame.fp_stack_depth); match frame.if_else { IfElseState::If(label) => { @@ -6593,6 +7218,18 @@ impl FunctionCodeGenerator for X64FunctionCode { get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let v_a = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cncl: Option<(Option, Option)> = + if self.fp_stack.len() >= 2 + && self.fp_stack[self.fp_stack.len() - 2].depth == self.value_stack.len() + && self.fp_stack[self.fp_stack.len() - 1].depth + == self.value_stack.len() + 1 + { + let (left, right) = self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::new(self.value_stack.len())); + Some((left.canonicalization, right.canonicalization)) + } else { + None + }; let ret = self.machine.acquire_locations( a, &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], @@ -6612,27 +7249,47 @@ impl FunctionCodeGenerator for X64FunctionCode { cond, ); a.emit_jmp(Condition::Equal, zero_label); - if v_a != ret { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - v_a, - ret, - ); + match cncl { + Some((Some(fp), _)) + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization => + { + Self::canonicalize_nan(a, &mut self.machine, fp.to_size(), v_a, ret); + } + _ => { + if v_a != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + v_a, + ret, + ); + } + } } a.emit_jmp(Condition::None, end_label); a.emit_label(zero_label); - if v_b != ret { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - v_b, - ret, - ); + match cncl { + Some((_, Some(fp))) + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization => + { + Self::canonicalize_nan(a, &mut self.machine, fp.to_size(), v_b, ret); + } + _ => { + if v_b != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + v_b, + ret, + ); + } + } } a.emit_label(end_label); } @@ -6651,6 +7308,7 @@ impl FunctionCodeGenerator for X64FunctionCode { } }, value_stack_depth: self.value_stack.len(), + fp_stack_depth: self.fp_stack.len(), state: self.machine.state.clone(), state_diff_id: Self::get_state_diff( &self.machine, @@ -6680,6 +7338,7 @@ impl FunctionCodeGenerator for X64FunctionCode { } }, value_stack_depth: self.value_stack.len(), + fp_stack_depth: self.fp_stack.len(), state: self.machine.state.clone(), state_diff_id, }); @@ -6844,6 +7503,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); Self::emit_memory_op( module_info, @@ -7038,6 +7699,8 @@ impl FunctionCodeGenerator for X64FunctionCode { get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target_addr = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let fp = self.fp_stack.pop1()?; + let config_nan_canonicalization = self.config.nan_canonicalization; Self::emit_memory_op( module_info, @@ -7050,14 +7713,28 @@ impl FunctionCodeGenerator for X64FunctionCode { false, 4, |a, m, addr| { - Self::emit_relaxed_binop( - a, - m, - Assembler::emit_mov, - Size::S32, - target_value, - Location::Memory(addr, 0), - ); + if !a.arch_supports_canonicalize_nan() + || !config_nan_canonicalization + || fp.canonicalization.is_none() + { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + } else { + Self::canonicalize_nan( + a, + m, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + } + Ok(()) }, )?; @@ -7162,6 +7839,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); Self::emit_memory_op( module_info, @@ -7438,6 +8117,8 @@ impl FunctionCodeGenerator for X64FunctionCode { get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target_addr = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let fp = self.fp_stack.pop1()?; + let config_nan_canonicalization = self.config.nan_canonicalization; Self::emit_memory_op( module_info, @@ -7450,14 +8131,27 @@ impl FunctionCodeGenerator for X64FunctionCode { false, 8, |a, m, addr| { - Self::emit_relaxed_binop( - a, - m, - Assembler::emit_mov, - Size::S64, - target_value, - Location::Memory(addr, 0), - ); + if !a.arch_supports_canonicalize_nan() + || !config_nan_canonicalization + || fp.canonicalization.is_none() + { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + target_value, + Location::Memory(addr, 0), + ); + } else { + Self::canonicalize_nan( + a, + m, + Size::S64, + target_value, + Location::Memory(addr, 0), + ); + } Ok(()) }, )?; @@ -7568,14 +8262,43 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } } let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); @@ -7592,8 +8315,39 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } } + let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); a.emit_jmp(Condition::None, frame.label); @@ -7622,7 +8376,36 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } } let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); @@ -7673,7 +8456,36 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } } let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); @@ -7691,7 +8503,36 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } } let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); @@ -7706,20 +8547,54 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::Drop => { get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + if let Some(x) = self.fp_stack.last() { + if x.depth == self.value_stack.len() { + self.fp_stack.pop1()?; + } + } } Operator::End => { let frame = self.control_stack.pop().unwrap(); if !was_unreachable && frame.returns.len() > 0 { let loc = *self.value_stack.last().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } } if self.control_stack.len() == 0 { @@ -7744,6 +8619,7 @@ impl FunctionCodeGenerator for X64FunctionCode { let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations(a, released); self.value_stack.truncate(frame.value_stack_depth); + self.fp_stack.truncate(frame.fp_stack_depth); if !frame.loop_like { a.emit_label(frame.label); @@ -7769,6 +8645,11 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; a.emit_mov(Size::S64, Location::GPR(GPR::RAX), loc); self.value_stack.push(loc); + if frame.returns[0].is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + // we already canonicalized at the `Br*` instruction or here previously. + } } } } diff --git a/lib/spectests/spectests/wasmer.wast b/lib/spectests/spectests/wasmer.wast index cf3841f9bfc..1a7647ee1e7 100644 --- a/lib/spectests/spectests/wasmer.wast +++ b/lib/spectests/spectests/wasmer.wast @@ -3,15 +3,21 @@ (module ;; Auxiliary definitions (type $out-i32 (func (result i32))) + (type $f32-id (func (param f32) (result f32))) + (type $f64-id (func (param f64) (result f64))) (func $const-i32 (type $out-i32) (i32.const 0x132)) (table funcref (elem $const-i32 + $nan-canonicalization-f32-func-call-target + $nan-canonicalization-f64-func-call-target ) ) + (memory 1) + ;; https://github.com/wasmerio/wasmer/pull/1191 (func (export "call-indirect-from-spilled-stack") (result i32) (i64.add (i64.const 0) (i64.const 0)) @@ -28,6 +34,182 @@ (call_indirect (type $out-i32)) (return) ) + + ;; NaN canonicalization tests. + ;; Things that are covered by spectests canonicalization (`fabs`, `fneg`, `fcopysign`, `reinterpret`, `const`) won't be duplicated here. + + (func (export "nan-canonicalization-f32-add") (param i32) (result i32) + (i32.reinterpret_f32 (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + ) + (func (export "nan-canonicalization-f32-sub") (param i32) (result i32) + (i32.reinterpret_f32 (f32.sub (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + ) + (func (export "nan-canonicalization-f32-mul") (param i32) (result i32) + (i32.reinterpret_f32 (f32.mul (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + ) + (func (export "nan-canonicalization-f32-div") (param i32) (result i32) + (i32.reinterpret_f32 (f32.div (f32.reinterpret_i32 (get_local 0)) (f32.const 1))) + ) + (func (export "nan-canonicalization-f32-max") (param i32) (result i32) + (i32.reinterpret_f32 (f32.max (f32.reinterpret_i32 (get_local 0)) (f32.const 1))) + ) + (func (export "nan-canonicalization-f32-min") (param i32) (result i32) + (i32.reinterpret_f32 (f32.min (f32.reinterpret_i32 (get_local 0)) (f32.const 1))) + ) + (func (export "nan-canonicalization-f32-nearest") (param i32) (result i32) + (i32.reinterpret_f32 (f32.nearest (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-floor") (param i32) (result i32) + (i32.reinterpret_f32 (f32.floor (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-ceil") (param i32) (result i32) + (i32.reinterpret_f32 (f32.ceil (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-trunc") (param i32) (result i32) + (i32.reinterpret_f32 (f32.trunc (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-sqrt") (param i32) (result i32) + (i32.reinterpret_f32 (f32.sqrt (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-mem") (param i32) (result i32) + (f32.store (i32.const 0) (f32.reinterpret_i32 (get_local 0))) + (i32.reinterpret_f32 (f32.load (i32.const 0))) + ) + (func (export "nan-canonicalization-f32-mem-cncl") (param i32) (result i32) + (f32.store (i32.const 0) (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + (i32.reinterpret_f32 (f32.load (i32.const 0))) + ) + (func (export "nan-canonicalization-f32-local") (param i32) (result i32) + (local f32) + (set_local 1 (f32.reinterpret_i32 (get_local 0))) + (i32.reinterpret_f32 (get_local 1)) + ) + (func (export "nan-canonicalization-f32-local-cncl") (param i32) (result i32) + (local f32) + (set_local 1 (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + (i32.reinterpret_f32 (get_local 1)) + ) + (func $nan-canonicalization-f32-func-call-target (param f32) (result f32) + (get_local 0) + ) + (func (export "nan-canonicalization-f32-func-call") (param i32) (result i32) + (i32.reinterpret_f32 (call $nan-canonicalization-f32-func-call-target (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-func-call-cncl") (param i32) (result i32) + (i32.reinterpret_f32 (call $nan-canonicalization-f32-func-call-target (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0)))) + ) + (func (export "nan-canonicalization-f32-func-call-indirect") (param i32) (result i32) + (i32.reinterpret_f32 (call_indirect (type $f32-id) (f32.reinterpret_i32 (get_local 0)) (i32.const 1))) + ) + (func (export "nan-canonicalization-f32-func-call-indirect-cncl") (param i32) (result i32) + (i32.reinterpret_f32 (call_indirect (type $f32-id) (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0)) (i32.const 1))) + ) + + (func (export "nan-canonicalization-f64-add") (param i64) (result i64) + (i64.reinterpret_f64 (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + ) + (func (export "nan-canonicalization-f64-sub") (param i64) (result i64) + (i64.reinterpret_f64 (f64.sub (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + ) + (func (export "nan-canonicalization-f64-mul") (param i64) (result i64) + (i64.reinterpret_f64 (f64.mul (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + ) + (func (export "nan-canonicalization-f64-div") (param i64) (result i64) + (i64.reinterpret_f64 (f64.div (f64.reinterpret_i64 (get_local 0)) (f64.const 1))) + ) + (func (export "nan-canonicalization-f64-max") (param i64) (result i64) + (i64.reinterpret_f64 (f64.max (f64.reinterpret_i64 (get_local 0)) (f64.const 1))) + ) + (func (export "nan-canonicalization-f64-min") (param i64) (result i64) + (i64.reinterpret_f64 (f64.min (f64.reinterpret_i64 (get_local 0)) (f64.const 1))) + ) + (func (export "nan-canonicalization-f64-nearest") (param i64) (result i64) + (i64.reinterpret_f64 (f64.nearest (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-floor") (param i64) (result i64) + (i64.reinterpret_f64 (f64.floor (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-ceil") (param i64) (result i64) + (i64.reinterpret_f64 (f64.ceil (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-trunc") (param i64) (result i64) + (i64.reinterpret_f64 (f64.trunc (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-sqrt") (param i64) (result i64) + (i64.reinterpret_f64 (f64.sqrt (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-mem") (param i64) (result i64) + (f64.store (i32.const 0) (f64.reinterpret_i64 (get_local 0))) + (i64.reinterpret_f64 (f64.load (i32.const 0))) + ) + (func (export "nan-canonicalization-f64-mem-cncl") (param i64) (result i64) + (f64.store (i32.const 0) (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + (i64.reinterpret_f64 (f64.load (i32.const 0))) + ) + (func (export "nan-canonicalization-f64-local") (param i64) (result i64) + (local f64) + (set_local 1 (f64.reinterpret_i64 (get_local 0))) + (i64.reinterpret_f64 (get_local 1)) + ) + (func (export "nan-canonicalization-f64-local-cncl") (param i64) (result i64) + (local f64) + (set_local 1 (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + (i64.reinterpret_f64 (get_local 1)) + ) + (func $nan-canonicalization-f64-func-call-target (param f64) (result f64) + (get_local 0) + ) + (func (export "nan-canonicalization-f64-func-call") (param i64) (result i64) + (i64.reinterpret_f64 (call $nan-canonicalization-f64-func-call-target (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-func-call-cncl") (param i64) (result i64) + (i64.reinterpret_f64 (call $nan-canonicalization-f64-func-call-target (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0)))) + ) + (func (export "nan-canonicalization-f64-func-call-indirect") (param i64) (result i64) + (i64.reinterpret_f64 (call_indirect (type $f64-id) (f64.reinterpret_i64 (get_local 0)) (i32.const 2))) + ) + (func (export "nan-canonicalization-f64-func-call-indirect-cncl") (param i64) (result i64) + (i64.reinterpret_f64 (call_indirect (type $f64-id) (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0)) (i32.const 2))) + ) ) -(assert_return (invoke "call-indirect-from-spilled-stack") (i32.const 0x132)) \ No newline at end of file +(assert_return (invoke "call-indirect-from-spilled-stack") (i32.const 0x132)) +(assert_return (invoke "nan-canonicalization-f32-add" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-sub" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-mul" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-div" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-max" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-min" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-nearest" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-floor" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-ceil" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-trunc" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-sqrt" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-mem" (i32.const 0x7fc00001)) (i32.const 0x7fc00001)) +(assert_return (invoke "nan-canonicalization-f32-mem-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-local" (i32.const 0x7fc00001)) (i32.const 0x7fc00001)) +(assert_return (invoke "nan-canonicalization-f32-local-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-func-call" (i32.const 0x7fc00001)) (i32.const 0x7fc00001)) +(assert_return (invoke "nan-canonicalization-f32-func-call-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-func-call-indirect" (i32.const 0x7fc00001)) (i32.const 0x7fc00001)) +(assert_return (invoke "nan-canonicalization-f32-func-call-indirect-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) + +(assert_return (invoke "nan-canonicalization-f64-add" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-sub" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-mul" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-div" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-max" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-min" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-nearest" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-floor" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-ceil" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-trunc" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-sqrt" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-mem" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) +(assert_return (invoke "nan-canonicalization-f64-mem-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-local" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) +(assert_return (invoke "nan-canonicalization-f64-local-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-func-call" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) +(assert_return (invoke "nan-canonicalization-f64-func-call-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-func-call-indirect" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) +(assert_return (invoke "nan-canonicalization-f64-func-call-indirect-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) diff --git a/lib/spectests/tests/excludes.txt b/lib/spectests/tests/excludes.txt index a64d96b488a..54c886a89a7 100644 --- a/lib/spectests/tests/excludes.txt +++ b/lib/spectests/tests/excludes.txt @@ -244,10 +244,6 @@ clif:fail:exports.wast:167:windows # Module - caught panic Any clif:fail:exports.wast:168:windows # Module - caught panic Any clif:fail:exports.wast:169:windows # Module - caught panic Any clif:fail:exports.wast:170:windows # Module - caught panic Any -clif:fail:f32.wast:2496:windows # "AssertReturnArithmeticNan" - value is not arithmetic nan F32(NaN) -clif:fail:f32.wast:2498:windows # "AssertReturnArithmeticNan" - value is not arithmetic nan F32(NaN) -clif:fail:f64.wast:2496:windows # "AssertReturnArithmeticNan" - value is not arithmetic nan F64(NaN) -clif:fail:f64.wast:2498:windows # "AssertReturnArithmeticNan" - value is not arithmetic nan F64(NaN) clif:fail:func.wast:289:windows # Module - caught panic Any clif:fail:memory.wast:3:windows # Module - caught panic Any clif:fail:memory.wast:4:windows # Module - caught panic Any @@ -523,4 +519,36 @@ singlepass:fail:traps.wast:53:*:aarch64 # AssertTrap - expected trap, got [] singlepass:fail:traps.wast:54:*:aarch64 # AssertTrap - expected trap, got [] singlepass:fail:traps.wast:55:*:aarch64 # AssertTrap - expected trap, got [] singlepass:fail:traps.wast:56:*:aarch64 # AssertTrap - expected trap, got [] -singlepass:fail:traps.wast:57:*:aarch64 # AssertTrap - expected trap, got [] \ No newline at end of file +singlepass:fail:traps.wast:57:*:aarch64 # AssertTrap - expected trap, got [] + +# NaN canonicalization is not yet implemented for aarch64. +singlepass:fail:wasmer.wast:177:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:178:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:179:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:180:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:181:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:182:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:183:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:184:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:185:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:186:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:187:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:189:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:191:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:193:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:195:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:197:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:198:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:199:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:200:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:201:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:202:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:203:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:204:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:205:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:206:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:207:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:209:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:211:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:213:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:215:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") \ No newline at end of file diff --git a/lib/spectests/tests/spectest.rs b/lib/spectests/tests/spectest.rs index 409a9d90829..c3d1ecce723 100644 --- a/lib/spectests/tests/spectest.rs +++ b/lib/spectests/tests/spectest.rs @@ -336,6 +336,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; let module = compile_with_config(&module.into_vec(), config) @@ -774,6 +775,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; compile_with_config(&module.into_vec(), config) @@ -826,6 +828,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; compile_with_config(&module.into_vec(), config) @@ -877,6 +880,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; let module = compile_with_config(&module.into_vec(), config) @@ -972,6 +976,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; let module = compile_with_config(&module.into_vec(), config)