Skip to content

Commit

Permalink
Add x86 encoding for SIMD imul
Browse files Browse the repository at this point in the history
Only i16x8 and i32x4 are encoded in this commit mainly because i8x16 and i64x2 do not have simple encodings in x86. i64x2 is not required by the SIMD spec and there is discussion (WebAssembly/simd#98 (comment)) about removing i8x16.
  • Loading branch information
abrown committed Sep 25, 2019
1 parent 757eac4 commit dcab6cb
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 4 deletions.
13 changes: 13 additions & 0 deletions cranelift-codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2074,6 +2074,19 @@ pub(crate) fn define(
e.enc_32_64(isub, rec_fa.opcodes(opcodes));
}

// SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
// and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
for (ty, opcodes, isap) in [
(I16, vec![0x66, 0x0f, 0xd5], None), // PMULLW from SSE2
(I32, vec![0x66, 0x0f, 0x38, 0x40], Some(use_sse41_simd)), // PMULLD from SSE4.1
]
.iter()
.cloned()
{
let imul = imul.bind_vector_from_lane(ty, sse_vector_size);
e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), isap);
}

// SIMD icmp using PCMPEQ*
let mut pcmpeq_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
HashMap::new();
Expand Down
3 changes: 1 addition & 2 deletions cranelift-codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1719,8 +1719,7 @@ pub(crate) fn define(
Wrapping integer multiplication: `a := x y \pmod{2^B}`.
This instruction does not depend on the signed/unsigned interpretation
of the
operands.
of the operands.
Polymorphic over all integer types (vector and scalar).
"#,
Expand Down
6 changes: 4 additions & 2 deletions cranelift-wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -998,6 +998,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = state.pop1();
state.push1(builder.ins().ineg(a))
}
Operator::I16x8Mul | Operator::I32x4Mul => {
let (a, b) = state.pop2();
state.push1(builder.ins().imul(a, b))
}
Operator::V128Load { .. }
| Operator::V128Store { .. }
| Operator::I8x16Eq
Expand Down Expand Up @@ -1066,13 +1070,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I16x8AddSaturateU
| Operator::I16x8SubSaturateS
| Operator::I16x8SubSaturateU
| Operator::I16x8Mul
| Operator::I32x4AnyTrue
| Operator::I32x4AllTrue
| Operator::I32x4Shl
| Operator::I32x4ShrS
| Operator::I32x4ShrU
| Operator::I32x4Mul
| Operator::I64x2AnyTrue
| Operator::I64x2AllTrue
| Operator::I64x2Shl
Expand Down
44 changes: 44 additions & 0 deletions filetests/isa/x86/simd-arithmetic.clif
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,47 @@ ebb0:

return ; bin: c3
}

function %imul_i32x4() -> b1 {
ebb0:
[-, %xmm0] v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01
[-, %xmm1] v1 = vconst.i32x4 [2 2 2 2]
[-, %xmm0] v2 = imul v0, v1 ; bin: 66 0f 38 40 c1

v3 = extractlane v2, 0
v4 = icmp_imm eq v3, -2

v5 = extractlane v2, 1
v6 = icmp_imm eq v5, 0

v7 = extractlane v2, 3
v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped)

v9 = band v4, v6
v10 = band v8, v9
return v10
}
; run

function %imul_i16x8() -> b1 {
ebb0:
[-, %xmm1] v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff
[-, %xmm2] v1 = vconst.i16x8 [2 2 2 2 0 0 0 0]
[-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca

v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being
; uextend-ed, not sextend-ed

v5 = extractlane v2, 1
v6 = icmp_imm eq v5, 0

v7 = extractlane v2, 3
v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe

v9 = band v4, v6
v10 = band v8, v9

return v4
}
; run

0 comments on commit dcab6cb

Please sign in to comment.