Skip to content

Commit

Permalink
Add x86 encoding for SIMD imul
Browse files Browse the repository at this point in the history
Only i16x8 and i32x4 are encoded in this commit mainly because i8x16 and i64x2 do not have simple encodings in x86. i64x2 is not required by the SIMD spec and there is discussion (WebAssembly/simd#98 (comment)) about removing i8x16.
  • Loading branch information
abrown committed Sep 18, 2019
1 parent c33cb52 commit 2c9dad7
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 4 deletions.
14 changes: 14 additions & 0 deletions cranelift-codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2012,6 +2012,20 @@ pub(crate) fn define(
e.enc_32_64(isub, rec_fa.opcodes(opcodes));
}

// SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
// and I64x2. The necessary encodings are avoided for now (TODO) until we finalize whether they
// are essential to the Wasm SIMD spec (I64x2 has already been removed).
for (ty, opcodes, isap) in [
(I16, vec![0x66, 0x0f, 0xd5], None), // PMULLW from SSE2
(I32, vec![0x66, 0x0f, 0x38, 0x40], Some(use_sse41_simd)), // PMULLD from SSE4.1
]
.iter()
.cloned()
{
let imul = imul.bind_vector_from_lane(ty, sse_vector_size);
e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), isap);
}

// Reference type instructions

// Null references implemented as iconst 0.
Expand Down
3 changes: 1 addition & 2 deletions cranelift-codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1682,8 +1682,7 @@ pub(crate) fn define(
Wrapping integer multiplication: `a := x y \pmod{2^B}`.
This instruction does not depend on the signed/unsigned interpretation
of the
operands.
of the operands.
Polymorphic over all integer types (vector and scalar).
"#,
Expand Down
6 changes: 4 additions & 2 deletions cranelift-wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = state.pop1();
state.push1(builder.ins().ineg(a))
}
Operator::I16x8Mul | Operator::I32x4Mul => {
let (a, b) = state.pop2();
state.push1(builder.ins().imul(a, b))
}
Operator::V128Load { .. }
| Operator::V128Store { .. }
| Operator::V8x16Shuffle { .. }
Expand Down Expand Up @@ -1055,13 +1059,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I16x8AddSaturateU
| Operator::I16x8SubSaturateS
| Operator::I16x8SubSaturateU
| Operator::I16x8Mul
| Operator::I32x4AnyTrue
| Operator::I32x4AllTrue
| Operator::I32x4Shl
| Operator::I32x4ShrS
| Operator::I32x4ShrU
| Operator::I32x4Mul
| Operator::I64x2AnyTrue
| Operator::I64x2AllTrue
| Operator::I64x2Shl
Expand Down
44 changes: 44 additions & 0 deletions filetests/isa/x86/simd-arithmetic.clif
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,47 @@ ebb0:

return ; bin: c3
}

function %imul_i32x4() -> b1 {
ebb0:
[-, %xmm0] v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01
[-, %xmm1] v1 = vconst.i32x4 [2 2 2 2]
[-, %xmm0] v2 = imul v0, v1 ; bin: 66 0f 38 40 c1

v3 = extractlane v2, 0
v4 = icmp_imm eq v3, -2

v5 = extractlane v2, 1
v6 = icmp_imm eq v5, 0

v7 = extractlane v2, 3
v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped)

v9 = band v4, v6
v10 = band v8, v9
return v10
}
; run

function %imul_i16x8() -> b1 {
ebb0:
[-, %xmm1] v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff
[-, %xmm2] v1 = vconst.i16x8 [2 2 2 2 0 0 0 0]
[-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca

v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being
; uextend-ed, not sextend-ed

v5 = extractlane v2, 1
v6 = icmp_imm eq v5, 0

v7 = extractlane v2, 3
v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe

v9 = band v4, v6
v10 = band v8, v9

return v4
}
; run

0 comments on commit 2c9dad7

Please sign in to comment.