diff --git a/cranelift-codegen/meta/src/isa/x86/encodings.rs b/cranelift-codegen/meta/src/isa/x86/encodings.rs index 686701229..05c15cd43 100644 --- a/cranelift-codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift-codegen/meta/src/isa/x86/encodings.rs @@ -2012,6 +2012,20 @@ pub(crate) fn define( e.enc_32_64(isub, rec_fa.opcodes(opcodes)); } + // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16 + // and I64x2. The necessary encodings are avoided for now (TODO) until we finalize whether they + // are essential to the Wasm SIMD spec (I64x2 has already been removed). + for (ty, opcodes, isap) in [ + (I16, vec![0x66, 0x0f, 0xd5], None), // PMULLW from SSE2 + (I32, vec![0x66, 0x0f, 0x38, 0x40], Some(use_sse41_simd)), // PMULLD from SSE4.1 + ] + .iter() + .cloned() + { + let imul = imul.bind_vector_from_lane(ty, sse_vector_size); + e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), isap); + } + // Reference type instructions // Null references implemented as iconst 0. diff --git a/cranelift-codegen/meta/src/shared/instructions.rs b/cranelift-codegen/meta/src/shared/instructions.rs index 2b340910d..51e303f0d 100644 --- a/cranelift-codegen/meta/src/shared/instructions.rs +++ b/cranelift-codegen/meta/src/shared/instructions.rs @@ -1682,8 +1682,7 @@ pub(crate) fn define( Wrapping integer multiplication: `a := x y \pmod{2^B}`. This instruction does not depend on the signed/unsigned interpretation - of the - operands. + of the operands. Polymorphic over all integer types (vector and scalar). "#, diff --git a/cranelift-wasm/src/code_translator.rs b/cranelift-wasm/src/code_translator.rs index d0516fbb5..70afb1b0d 100644 --- a/cranelift-wasm/src/code_translator.rs +++ b/cranelift-wasm/src/code_translator.rs @@ -986,6 +986,10 @@ pub fn translate_operator( let a = state.pop1(); state.push1(builder.ins().ineg(a)) } + Operator::I16x8Mul | Operator::I32x4Mul => { + let (a, b) = state.pop2(); + state.push1(builder.ins().imul(a, b)) + } Operator::V128Load { .. } | Operator::V128Store { .. } | Operator::V8x16Shuffle { .. } @@ -1055,13 +1059,11 @@ pub fn translate_operator( | Operator::I16x8AddSaturateU | Operator::I16x8SubSaturateS | Operator::I16x8SubSaturateU - | Operator::I16x8Mul | Operator::I32x4AnyTrue | Operator::I32x4AllTrue | Operator::I32x4Shl | Operator::I32x4ShrS | Operator::I32x4ShrU - | Operator::I32x4Mul | Operator::I64x2AnyTrue | Operator::I64x2AllTrue | Operator::I64x2Shl diff --git a/filetests/isa/x86/simd-arithmetic.clif b/filetests/isa/x86/simd-arithmetic.clif index 824417772..e2714a91d 100644 --- a/filetests/isa/x86/simd-arithmetic.clif +++ b/filetests/isa/x86/simd-arithmetic.clif @@ -120,3 +120,47 @@ ebb0: return ; bin: c3 } + +function %imul_i32x4() -> b1 { +ebb0: +[-, %xmm0] v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01 +[-, %xmm1] v1 = vconst.i32x4 [2 2 2 2] +[-, %xmm0] v2 = imul v0, v1 ; bin: 66 0f 38 40 c1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, -2 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped) + + v9 = band v4, v6 + v10 = band v8, v9 + return v10 +} +; run + +function %imul_i16x8() -> b1 { +ebb0: +[-, %xmm1] v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff +[-, %xmm2] v1 = vconst.i16x8 [2 2 2 2 0 0 0 0] +[-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being + ; uextend-ed, not sextend-ed + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe + + v9 = band v4, v6 + v10 = band v8, v9 + + return v4 +} +; run