From d990dd4c9a82ea3c924b228fe4f0bc2a74b24c3a Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 13 Oct 2020 10:02:12 -0700 Subject: [PATCH] [machinst x64]: add source locations to more instruction formats In order to register traps for `load_splat`, several instruction formats need knowledge of `SourceLoc`s; however, since the x64 backend does not correctly and completely register traps for `RegMem::Mem` variants I opened https://github.com/bytecodealliance/wasmtime/issues/2290 to discuss and resolve this issue. In the meantime, the current behavior (i.e. remaining largely unaware of `SourceLoc`s) is retained. --- cranelift/codegen/src/isa/x64/inst/emit.rs | 42 +++- .../codegen/src/isa/x64/inst/emit_tests.rs | 104 ++++----- cranelift/codegen/src/isa/x64/inst/mod.rs | 86 ++++--- cranelift/codegen/src/isa/x64/lower.rs | 216 +++++++++++++----- 4 files changed, 303 insertions(+), 145 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 688e620d8302..397b21f69d3c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1728,6 +1728,7 @@ pub(crate) fn emit( op, src: src_e, dst: reg_g, + srcloc, } => { let rex = RexFlags::clear_w(); let (prefix, opcode, length) = match op { @@ -1819,6 +1820,10 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex); } RegMem::Mem { addr } => { + if let Some(srcloc) = *srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } let addr = &addr.finalize(state); emit_std_reg_mem(sink, prefix, opcode, length, reg_g.to_reg(), addr, rex); } @@ -1889,7 +1894,7 @@ pub(crate) fn emit( // and negative zero. These instructions merge the sign bits in that // case, and are no-ops otherwise. let op = if *is_min { or_op } else { and_op }; - let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst); + let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst, None); inst.emit(sink, info, state); let inst = Inst::jmp_known(done); @@ -1899,13 +1904,13 @@ pub(crate) fn emit( // read-only operand: perform an addition between the two operands, which has the // desired NaN propagation effects. sink.bind_label(propagate_nan); - let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst); + let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst, None); inst.emit(sink, info, state); one_way_jmp(sink, CC::P, done); sink.bind_label(do_min_max); - let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst); + let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst, None); inst.emit(sink, info, state); sink.bind_label(done); @@ -1916,7 +1921,8 @@ pub(crate) fn emit( src, dst, imm, - is64: w, + is64, + srcloc, } => { let (prefix, opcode, len) = match op { SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2), @@ -1933,7 +1939,7 @@ pub(crate) fn emit( SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2), _ => unimplemented!("Opcode {:?} not implemented", op), }; - let rex = if *w { + let rex = if *is64 { RexFlags::set_w() } else { RexFlags::clear_w() @@ -1955,6 +1961,10 @@ pub(crate) fn emit( } } RegMem::Mem { addr } => { + if let Some(srcloc) = *srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } let addr = &addr.finalize(state); assert!( !regs_swapped, @@ -1963,7 +1973,7 @@ pub(crate) fn emit( emit_std_reg_mem(sink, prefix, opcode, len, dst.to_reg(), addr, rex); } } - sink.put1(*imm) + sink.put1(*imm); } Inst::XmmLoadConstSeq { val, dst, ty } => { @@ -2188,7 +2198,7 @@ pub(crate) fn emit( } else { SseOpcode::Addss }; - let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst); + let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst, None); inst.emit(sink, info, state); sink.bind_label(done); @@ -2295,8 +2305,12 @@ pub(crate) fn emit( // If the input was positive, saturate to INT_MAX. // Zero out tmp_xmm. - let inst = - Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm); + let inst = Inst::xmm_rm_r( + SseOpcode::Xorpd, + RegMem::reg(tmp_xmm.to_reg()), + *tmp_xmm, + None, + ); inst.emit(sink, info, state); let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg()); @@ -2367,8 +2381,12 @@ pub(crate) fn emit( sink.bind_label(check_positive); // Zero out the tmp_xmm register. - let inst = - Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm); + let inst = Inst::xmm_rm_r( + SseOpcode::Xorpd, + RegMem::reg(tmp_xmm.to_reg()), + *tmp_xmm, + None, + ); inst.emit(sink, info, state); let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg()); @@ -2522,7 +2540,7 @@ pub(crate) fn emit( sink.bind_label(handle_large); - let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src); + let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src, None); inst.emit(sink, info, state); let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 62992be2bd49..71120a101df1 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2983,12 +2983,12 @@ fn test_x64_emit() { // XMM_RM_R: float binary ops insns.push(( - Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0), + Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0, None), "F30F58C1", "addss %xmm1, %xmm0", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13), + Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13, None), "F3450F58EB", "addss %xmm11, %xmm13", )); @@ -2997,23 +2997,24 @@ fn test_x64_emit() { SseOpcode::Addss, RegMem::mem(Amode::imm_reg_reg_shift(123, r10, rdx, 2)), w_xmm0, + None, ), "F3410F5844927B", "addss 123(%r10,%rdx,4), %xmm0", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4), + Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4, None), "F2410F58E7", "addsd %xmm15, %xmm4", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1), + Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1, None), "F30F5CC8", "subss %xmm0, %xmm1", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1), + Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1, None), "F3410F5CCC", "subss %xmm12, %xmm1", )); @@ -3022,57 +3023,58 @@ fn test_x64_emit() { SseOpcode::Subss, RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rax, 3)), w_xmm10, + None, ), "F3450F5C94C241010000", "subss 321(%r10,%rax,8), %xmm10", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14), + Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14, None), "F2440F5CF5", "subsd %xmm5, %xmm14", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4), + Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4, None), "F30F59E5", "mulss %xmm5, %xmm4", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4), + Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4, None), "F20F59E5", "mulsd %xmm5, %xmm4", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7), + Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7, None), "F3410F5EF8", "divss %xmm8, %xmm7", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4), + Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4, None), "F20F5EE5", "divsd %xmm5, %xmm4", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12), + Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12, None), "440F54E3", "andps %xmm3, %xmm12", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11), + Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11, None), "440F55DC", "andnps %xmm4, %xmm11", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15), + Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15, None), "440F56F9", "orps %xmm1, %xmm15", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4), + Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4, None), "0F56E5", "orps %xmm5, %xmm4", )); @@ -3081,211 +3083,211 @@ fn test_x64_emit() { // XMM_RM_R: Integer Packed insns.push(( - Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5), + Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5, None), "66410FFCE9", "paddb %xmm9, %xmm5", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6), + Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6, None), "660FFDF7", "paddw %xmm7, %xmm6", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13), + Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13, None), "66450FFEEC", "paddd %xmm12, %xmm13", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8), + Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8, None), "66440FD4C1", "paddq %xmm1, %xmm8", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5), + Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5, None), "66410FECE9", "paddsb %xmm9, %xmm5", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6), + Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6, None), "660FEDF7", "paddsw %xmm7, %xmm6", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13), + Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13, None), "66450FDCEC", "paddusb %xmm12, %xmm13", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8), + Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8, None), "66440FDDC1", "paddusw %xmm1, %xmm8", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5), + Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5, None), "66410FE8E9", "psubsb %xmm9, %xmm5", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6), + Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6, None), "660FE9F7", "psubsw %xmm7, %xmm6", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13), + Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13, None), "66450FD8EC", "psubusb %xmm12, %xmm13", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8), + Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8, None), "66440FD9C1", "psubusw %xmm1, %xmm8", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13), + Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13, None), "66450FE0EC", "pavgb %xmm12, %xmm13", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8), + Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8, None), "66440FE3C1", "pavgw %xmm1, %xmm8", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9), + Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9, None), "66440FF8CD", "psubb %xmm5, %xmm9", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7), + Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7, None), "660FF9FE", "psubw %xmm6, %xmm7", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12), + Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12, None), "66450FFAE5", "psubd %xmm13, %xmm12", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1), + Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1, None), "66410FFBC8", "psubq %xmm8, %xmm1", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6), + Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6, None), "66410F3840F7", "pmulld %xmm15, %xmm6", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1), + Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1, None), "66410FD5CE", "pmullw %xmm14, %xmm1", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9), + Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9, None), "66450FF4C8", "pmuludq %xmm8, %xmm9", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6), + Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6, None), "66410F383CF7", "pmaxsb %xmm15, %xmm6", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6), + Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6, None), "66410FEEF7", "pmaxsw %xmm15, %xmm6", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6), + Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6, None), "66410F383DF7", "pmaxsd %xmm15, %xmm6", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1), + Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1, None), "66410FDECE", "pmaxub %xmm14, %xmm1", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1), + Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1, None), "66410F383ECE", "pmaxuw %xmm14, %xmm1", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1), + Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1, None), "66410F383FCE", "pmaxud %xmm14, %xmm1", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9), + Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9, None), "66450F3838C8", "pminsb %xmm8, %xmm9", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9), + Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9, None), "66450FEAC8", "pminsw %xmm8, %xmm9", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9), + Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9, None), "66450F3839C8", "pminsd %xmm8, %xmm9", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2), + Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2, None), "660FDAD3", "pminub %xmm3, %xmm2", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2), + Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2, None), "660F383AD3", "pminuw %xmm3, %xmm2", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2), + Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2, None), "660F383BD3", "pminud %xmm3, %xmm2", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2), + Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2, None), "66410FEFD3", "pxor %xmm11, %xmm2", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2), + Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2, None), "66410F3800D3", "pshufb %xmm11, %xmm2", )); @@ -3488,12 +3490,12 @@ fn test_x64_emit() { // ======================================================== // XmmRmRImm insns.push(( - Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false), + Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false, None), "660FC2CD02", "cmppd $2, %xmm5, %xmm1", )); insns.push(( - Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false), + Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false, None), "410FC2FF00", "cmpps $0, %xmm15, %xmm7", )); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 1fe0de694183..aac925db626f 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -212,6 +212,7 @@ pub enum Inst { op: SseOpcode, src: RegMem, dst: Writable, + srcloc: Option, }, /// XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg, sqrt, @@ -338,6 +339,7 @@ pub enum Inst { dst: Writable, imm: u8, is64: bool, + srcloc: Option, }, // ===================================== @@ -711,10 +713,20 @@ impl Inst { } } - pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Self { + pub(crate) fn xmm_rm_r( + op: SseOpcode, + src: RegMem, + dst: Writable, + srcloc: Option, + ) -> Self { src.assert_regclass_is(RegClass::V128); debug_assert!(dst.to_reg().get_class() == RegClass::V128); - Inst::XmmRmR { op, src, dst } + Inst::XmmRmR { + op, + src, + dst, + srcloc, + } } pub(crate) fn xmm_uninit_value(dst: Writable) -> Self { @@ -869,6 +881,7 @@ impl Inst { dst: Writable, imm: u8, is64: bool, + srcloc: Option, ) -> Inst { Inst::XmmRmRImm { op, @@ -876,6 +889,7 @@ impl Inst { dst, imm, is64, + srcloc, } } @@ -1233,16 +1247,26 @@ impl Inst { /// Choose which instruction to use for comparing two values for equality. pub(crate) fn equals(ty: Type, from: RegMem, to: Writable) -> Inst { match ty { - types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to), - types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to), - types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to), - types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to), - types::F32X4 => { - Inst::xmm_rm_r_imm(SseOpcode::Cmpps, from, to, FcmpImm::Equal.encode(), false) - } - types::F64X2 => { - Inst::xmm_rm_r_imm(SseOpcode::Cmppd, from, to, FcmpImm::Equal.encode(), false) - } + types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to, None), + types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to, None), + types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to, None), + types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to, None), + types::F32X4 => Inst::xmm_rm_r_imm( + SseOpcode::Cmpps, + from, + to, + FcmpImm::Equal.encode(), + false, + None, + ), + types::F64X2 => Inst::xmm_rm_r_imm( + SseOpcode::Cmppd, + from, + to, + FcmpImm::Equal.encode(), + false, + None, + ), _ => unimplemented!("unimplemented type for Inst::equals: {}", ty), } } @@ -1250,9 +1274,11 @@ impl Inst { /// Choose which instruction to use for computing a bitwise AND on two values. pub(crate) fn and(ty: Type, from: RegMem, to: Writable) -> Inst { match ty { - types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to), - types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to), - _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pand, from, to), + types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to, None), + types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to, None), + _ if ty.is_vector() && ty.bits() == 128 => { + Inst::xmm_rm_r(SseOpcode::Pand, from, to, None) + } _ => unimplemented!("unimplemented type for Inst::and: {}", ty), } } @@ -1260,9 +1286,11 @@ impl Inst { /// Choose which instruction to use for computing a bitwise AND NOT on two values. pub(crate) fn and_not(ty: Type, from: RegMem, to: Writable) -> Inst { match ty { - types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to), - types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to), - _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pandn, from, to), + types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to, None), + types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to, None), + _ if ty.is_vector() && ty.bits() == 128 => { + Inst::xmm_rm_r(SseOpcode::Pandn, from, to, None) + } _ => unimplemented!("unimplemented type for Inst::and_not: {}", ty), } } @@ -1270,9 +1298,11 @@ impl Inst { /// Choose which instruction to use for computing a bitwise OR on two values. pub(crate) fn or(ty: Type, from: RegMem, to: Writable) -> Inst { match ty { - types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to), - types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to), - _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Por, from, to), + types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to, None), + types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to, None), + _ if ty.is_vector() && ty.bits() == 128 => { + Inst::xmm_rm_r(SseOpcode::Por, from, to, None) + } _ => unimplemented!("unimplemented type for Inst::or: {}", ty), } } @@ -1280,9 +1310,11 @@ impl Inst { /// Choose which instruction to use for computing a bitwise XOR on two values. pub(crate) fn xor(ty: Type, from: RegMem, to: Writable) -> Inst { match ty { - types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to), - types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to), - _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pxor, from, to), + types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to, None), + types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to, None), + _ if ty.is_vector() && ty.bits() == 128 => { + Inst::xmm_rm_r(SseOpcode::Pxor, from, to, None) + } _ => unimplemented!("unimplemented type for Inst::xor: {}", ty), } } @@ -1429,7 +1461,7 @@ impl PrettyPrint for Inst { dst.show_rru(mb_rru), ), - Inst::XmmRmR { op, src, dst } => format!( + Inst::XmmRmR { op, src, dst, .. } => format!( "{} {}, {}", ljustify(op.to_string()), src.show_rru_sized(mb_rru, 8), @@ -1459,7 +1491,7 @@ impl PrettyPrint for Inst { show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8), ), - Inst::XmmRmRImm { op, src, dst, imm, is64 } => format!( + Inst::XmmRmRImm { op, src, dst, imm, is64, .. } => format!( "{} ${}, {}, {}", ljustify(format!("{}{}", op.to_string(), if *is64 { ".w" } else { "" })), imm, @@ -2595,6 +2627,7 @@ impl MachInst for Inst { SseOpcode::Xorps, RegMem::reg(to_reg.to_reg()), to_reg, + None, )); } else { let tmp = alloc_tmp(RegClass::I64, types::I32); @@ -2613,6 +2646,7 @@ impl MachInst for Inst { SseOpcode::Xorpd, RegMem::reg(to_reg.to_reg()), to_reg, + None, )); } else { let tmp = alloc_tmp(RegClass::I64, types::I64); diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 108072b97cce..576b87551537 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -3,7 +3,7 @@ use crate::data_value::DataValue; use crate::ir::{ condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName, - Inst as IRInst, InstructionData, LibCall, Opcode, Signature, Type, + Inst as IRInst, InstructionData, LibCall, Opcode, Signature, SourceLoc, Type, }; use crate::isa::x64::abi::*; use crate::isa::x64::inst::args::*; @@ -227,6 +227,7 @@ fn emit_insert_lane>( dst: Writable, lane: u8, ty: Type, + srcloc: Option, ) { if !ty.is_float() { let (sse_op, is64) = match ty.lane_bits() { @@ -236,13 +237,13 @@ fn emit_insert_lane>( 64 => (SseOpcode::Pinsrd, true), _ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()), }; - ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64)); + ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64, srcloc)); } else if ty == types::F32 { let sse_op = SseOpcode::Insertps; // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane // shifted into bits 5:6). let lane = 0b00_00_00_00 | lane << 4; - ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false)); + ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false, srcloc)); } else if ty == types::F64 { let sse_op = match lane { // Move the lowest quadword in replacement to vector without changing @@ -256,7 +257,7 @@ fn emit_insert_lane>( // Here we use the `xmm_rm_r` encoding because it correctly tells the register // allocator how we are using `dst`: we are using `dst` as a `mod` whereas other // encoding formats like `xmm_unary_rm_r` treat it as a `def`. - ctx.emit(Inst::xmm_rm_r(sse_op, src, dst)); + ctx.emit(Inst::xmm_rm_r(sse_op, src, dst, srcloc)); } else { panic!("unable to emit insertlane for type: {}", ty) } @@ -694,6 +695,7 @@ fn lower_insn_to_regs>( SseOpcode::Pmuludq, RegMem::reg(lhs.clone()), rhs_1, + None, )); // B' = B @@ -707,7 +709,12 @@ fn lower_insn_to_regs>( RegMemImm::imm(32), lhs_1, )); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(rhs), lhs_1)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmuludq, + RegMem::reg(rhs), + lhs_1, + None, + )); // B' = B' + A' // B' = B' << 32 @@ -715,6 +722,7 @@ fn lower_insn_to_regs>( SseOpcode::Paddq, RegMem::reg(rhs_1.to_reg()), lhs_1, + None, )); ctx.emit(Inst::xmm_rmi_reg( SseOpcode::Psllq, @@ -731,11 +739,13 @@ fn lower_insn_to_regs>( SseOpcode::Pmuludq, RegMem::reg(lhs.clone()), rhs_1, + None, )); ctx.emit(Inst::xmm_rm_r( SseOpcode::Paddq, RegMem::reg(lhs_1.to_reg()), rhs_1, + None, )); ctx.emit(Inst::gen_move(dst, rhs_1.to_reg(), ty)); return Ok(()); @@ -770,7 +780,7 @@ fn lower_insn_to_regs>( // Move the `lhs` to the same register as `dst`. ctx.emit(Inst::gen_move(dst, lhs, ty)); - ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst)); + ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst, None)); } else { let is_64 = ty == types::I64; let alu_op = match op { @@ -828,7 +838,7 @@ fn lower_insn_to_regs>( // Note the flipping of operands: the `rhs` operand is used as the destination instead // of the `lhs` as in the other bit operations above (e.g. `band`). ctx.emit(Inst::gen_move(dst, rhs, ty)); - ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst)); + ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst, None)); } Opcode::Iabs => { @@ -884,7 +894,7 @@ fn lower_insn_to_regs>( // Move the `lhs` to the same register as `dst`. ctx.emit(Inst::gen_move(dst, lhs, ty)); - ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst)); + ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst, None)); } else { panic!("Unsupported type for {} instruction: {}", op, ty); } @@ -1007,8 +1017,9 @@ fn lower_insn_to_regs>( SseOpcode::Pxor, RegMem::reg(tmp.to_reg()), tmp, + None, )); - ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp)); + ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp, None)); ctx.emit(Inst::xmm_unary_rm_r( SseOpcode::Movapd, RegMem::reg(tmp.to_reg()), @@ -1561,34 +1572,44 @@ fn lower_insn_to_regs>( }; match condcode { - IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)), + IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None)), IntCC::NotEqual => { - ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)); + ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None)); // Emit all 1s into the `tmp` register. let tmp = ctx.alloc_tmp(RegClass::V128, ty); - ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp)); + ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp, None)); // Invert the result of the `PCMPEQ*`. - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pxor, + RegMem::from(tmp), + dst, + None, + )); } IntCC::SignedGreaterThan | IntCC::SignedLessThan => { - ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst)) + ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst, None)) } IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => { - ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst)); - ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)) + ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst, None)); + ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None)) } IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => { - ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst)); - ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)); + ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst, None)); + ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None)); // Emit all 1s into the `tmp` register. let tmp = ctx.alloc_tmp(RegClass::V128, ty); - ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp)); + ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp, None)); // Invert the result of the `PCMPEQ*`. - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pxor, + RegMem::from(tmp), + dst, + None, + )); } IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => { - ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst)); - ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)) + ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst, None)); + ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None)) } _ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode), } @@ -1686,7 +1707,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::gen_move(dst, lhs, input_ty)); // Emit the comparison. - ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode(), false)); + ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode(), false, None)); } } @@ -1899,7 +1920,7 @@ fn lower_insn_to_regs>( ty ), }; - ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst)); + ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst, None)); } Opcode::Fmin | Opcode::Fmax => { @@ -1988,15 +2009,15 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1, None)); // Perform min in reverse direction - ctx.emit(Inst::xmm_rm_r(min_op, RegMem::from(dst), tmp_xmm1)); + ctx.emit(Inst::xmm_rm_r(min_op, RegMem::from(dst), tmp_xmm1, None)); // Perform min in original direction - ctx.emit(Inst::xmm_rm_r(min_op, RegMem::reg(lhs), dst)); + ctx.emit(Inst::xmm_rm_r(min_op, RegMem::reg(lhs), dst, None)); // X64 handles propagation of -0's and Nans differently between left and right // operands. After doing the min in both directions, this OR will // guarrentee capture of -0's and Nan in our tmp register - ctx.emit(Inst::xmm_rm_r(or_op, RegMem::from(dst), tmp_xmm1)); + ctx.emit(Inst::xmm_rm_r(or_op, RegMem::from(dst), tmp_xmm1, None)); // Compare unordered to create mask for lanes containing NaNs and then use // that mask to saturate the NaN containing lanes in the tmp register with 1s. @@ -2009,8 +2030,14 @@ fn lower_insn_to_regs>( dst, cond.encode(), false, + None, + )); + ctx.emit(Inst::xmm_rm_r( + or_op, + RegMem::reg(dst.to_reg()), + tmp_xmm1, + None, )); - ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(dst.to_reg()), tmp_xmm1)); // The dst register holds a mask for lanes containing NaNs. // We take that mask and shift in preparation for creating a different mask @@ -2022,7 +2049,12 @@ fn lower_insn_to_regs>( // Finally we do a nand with the tmp register to produce the final results // in the dst. - ctx.emit(Inst::xmm_rm_r(andn_op, RegMem::reg(tmp_xmm1.to_reg()), dst)); + ctx.emit(Inst::xmm_rm_r( + andn_op, + RegMem::reg(tmp_xmm1.to_reg()), + dst, + None, + )); } else { let ( mov_op, @@ -2065,23 +2097,43 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1, None)); // Perform max in reverse direction. - ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(dst.to_reg()), tmp_xmm1)); + ctx.emit(Inst::xmm_rm_r( + max_op, + RegMem::reg(dst.to_reg()), + tmp_xmm1, + None, + )); // Perform max in original direction. - ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(lhs), dst)); + ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(lhs), dst, None)); // Get the difference between the two results and store in tmp. // Max uses a different approach than min to account for potential // discrepancies with plus/minus 0. - ctx.emit(Inst::xmm_rm_r(xor_op, RegMem::reg(tmp_xmm1.to_reg()), dst)); + ctx.emit(Inst::xmm_rm_r( + xor_op, + RegMem::reg(tmp_xmm1.to_reg()), + dst, + None, + )); // X64 handles propagation of -0's and Nans differently between left and right // operands. After doing the max in both directions, this OR will // guarentee capture of 0's and Nan in our tmp register. - ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(dst.to_reg()), tmp_xmm1)); + ctx.emit(Inst::xmm_rm_r( + or_op, + RegMem::reg(dst.to_reg()), + tmp_xmm1, + None, + )); // Capture NaNs and sign discrepancies. - ctx.emit(Inst::xmm_rm_r(sub_op, RegMem::reg(dst.to_reg()), tmp_xmm1)); + ctx.emit(Inst::xmm_rm_r( + sub_op, + RegMem::reg(dst.to_reg()), + tmp_xmm1, + None, + )); // Compare unordered to create mask for lanes containing NaNs and then use // that mask to saturate the NaN containing lanes in the tmp register with 1s. @@ -2092,6 +2144,7 @@ fn lower_insn_to_regs>( dst, cond.encode(), false, + None, )); // The dst register holds a mask for lanes containing NaNs. @@ -2104,7 +2157,12 @@ fn lower_insn_to_regs>( // Finally we do a nand with the tmp register to produce the final results // in the dst. - ctx.emit(Inst::xmm_rm_r(andn_op, RegMem::reg(tmp_xmm1.to_reg()), dst)); + ctx.emit(Inst::xmm_rm_r( + andn_op, + RegMem::reg(tmp_xmm1.to_reg()), + dst, + None, + )); } } } @@ -2327,7 +2385,7 @@ fn lower_insn_to_regs>( ctx.emit(inst); } - ctx.emit(Inst::xmm_rm_r(opcode, src, dst)); + ctx.emit(Inst::xmm_rm_r(opcode, src, dst, None)); } else { // Eventually vector constants should be available in `gen_constant` and this block // can be merged with the one above (TODO). @@ -2348,6 +2406,7 @@ fn lower_insn_to_regs>( tmp, cond.encode(), false, + None, ); ctx.emit(cmpps); @@ -2367,7 +2426,7 @@ fn lower_insn_to_regs>( ctx.emit(shift); // Apply shifted mask (XOR or AND). - let mask = Inst::xmm_rm_r(opcode, RegMem::reg(tmp.to_reg()), dst); + let mask = Inst::xmm_rm_r(opcode, RegMem::reg(tmp.to_reg()), dst, None); ctx.emit(mask); } else { panic!("unexpected type {:?} for Fabs", output_ty); @@ -2426,14 +2485,20 @@ fn lower_insn_to_regs>( dst, None, )); - ctx.emit(Inst::xmm_rm_r(and_not_op, RegMem::reg(lhs), dst)); + ctx.emit(Inst::xmm_rm_r(and_not_op, RegMem::reg(lhs), dst, None)); ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(rhs), tmp_xmm2, None)); ctx.emit(Inst::xmm_rm_r( and_op, RegMem::reg(tmp_xmm1.to_reg()), tmp_xmm2, + None, + )); + ctx.emit(Inst::xmm_rm_r( + or_op, + RegMem::reg(tmp_xmm2.to_reg()), + dst, + None, )); - ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(tmp_xmm2.to_reg()), dst)); } Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => { @@ -3154,7 +3219,12 @@ fn lower_insn_to_regs>( // After loading the constructed mask in a temporary register, we use this to // shuffle the `dst` register (remember that, in this case, it is the same as // `src` so we disregard this register). - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp), dst)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pshufb, + RegMem::from(tmp), + dst, + None, + )); } else { // If `lhs` and `rhs` are different, we must shuffle each separately and then OR // them together. This is necessary due to PSHUFB semantics. As in the case above, @@ -3166,7 +3236,12 @@ fn lower_insn_to_regs>( let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect(); let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16); ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp1, ty)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp1), tmp0)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pshufb, + RegMem::from(tmp1), + tmp0, + None, + )); // PSHUFB the second argument, placing zeroes for unused lanes. let constructed_mask = mask @@ -3176,11 +3251,21 @@ fn lower_insn_to_regs>( .collect(); let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16); ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp2, ty)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp2), dst)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pshufb, + RegMem::from(tmp2), + dst, + None, + )); // OR the shuffled registers (the mechanism and lane-size for OR-ing the registers // is not important). - ctx.emit(Inst::xmm_rm_r(SseOpcode::Orps, RegMem::from(tmp0), dst)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Orps, + RegMem::from(tmp0), + dst, + None, + )); // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB } @@ -3214,6 +3299,7 @@ fn lower_insn_to_regs>( SseOpcode::Paddusb, RegMem::from(zero_mask), swizzle_mask, + None, )); // Shuffle `dst` using the fixed-up `swizzle_mask`. @@ -3221,6 +3307,7 @@ fn lower_insn_to_regs>( SseOpcode::Pshufb, RegMem::from(swizzle_mask), dst, + None, )); } @@ -3240,7 +3327,7 @@ fn lower_insn_to_regs>( debug_assert!(lane < ty.lane_count() as u8); ctx.emit(Inst::gen_move(dst, in_vec, ty)); - emit_insert_lane(ctx, src, dst, lane, ty.lane_type()); + emit_insert_lane(ctx, src, dst, lane, ty.lane_type(), None); } Opcode::Extractlane => { @@ -3266,7 +3353,7 @@ fn lower_insn_to_regs>( _ => panic!("Unable to extractlane for lane size: {}", ty.lane_bits()), }; let src = RegMem::reg(src); - ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, w_bit)); + ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, w_bit, None)); } else { if lane == 0 { // Remove the extractlane instruction, leaving the float where it is. The upper @@ -3288,7 +3375,7 @@ fn lower_insn_to_regs>( _ => unreachable!(), }; let src = RegMem::reg(src); - ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, mask, false)); + ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, mask, false, None)); } } } @@ -3307,16 +3394,26 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_uninit_value(dst)); match ty.lane_bits() { 8 => { - emit_insert_lane(ctx, src, dst, 0, ty.lane_type()); + emit_insert_lane(ctx, src, dst, 0, ty.lane_type(), srcloc); // Initialize a register with all 0s. let tmp = ctx.alloc_tmp(RegClass::V128, ty); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pxor, + RegMem::from(tmp), + tmp, + srcloc, + )); // Shuffle the lowest byte lane to all other lanes. - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp), dst)) + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pshufb, + RegMem::from(tmp), + dst, + srcloc, + )) } 16 => { - emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type()); - emit_insert_lane(ctx, src, dst, 1, ty.lane_type()); + emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type(), srcloc); + emit_insert_lane(ctx, src, dst, 1, ty.lane_type(), srcloc); // Shuffle the lowest two lanes to all other lanes. ctx.emit(Inst::xmm_rm_r_imm( SseOpcode::Pshufd, @@ -3324,10 +3421,11 @@ fn lower_insn_to_regs>( dst, 0, false, + srcloc, )) } 32 => { - emit_insert_lane(ctx, src, dst, 0, ty.lane_type()); + emit_insert_lane(ctx, src, dst, 0, ty.lane_type(), srcloc); // Shuffle the lowest lane to all other lanes. ctx.emit(Inst::xmm_rm_r_imm( SseOpcode::Pshufd, @@ -3335,11 +3433,12 @@ fn lower_insn_to_regs>( dst, 0, false, + srcloc, )) } 64 => { - emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type()); - emit_insert_lane(ctx, src, dst, 1, ty.lane_type()); + emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type(), srcloc); + emit_insert_lane(ctx, src, dst, 1, ty.lane_type(), srcloc); } _ => panic!("Invalid type to splat: {}", ty), } @@ -3373,9 +3472,14 @@ fn lower_insn_to_regs>( // Initialize a register with all 0s. let tmp = ctx.alloc_tmp(RegClass::V128, ty); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pxor, + RegMem::from(tmp), + tmp, + None, + )); // Compare to see what lanes are filled with all 1s. - ctx.emit(Inst::xmm_rm_r(eq(src_ty), src, tmp)); + ctx.emit(Inst::xmm_rm_r(eq(src_ty), src, tmp, None)); // Set the ZF if the result is all zeroes. ctx.emit(Inst::xmm_cmp_rm_r( SseOpcode::Ptest,