diff --git a/cranelift-codegen/meta/src/isa/x86/encodings.rs b/cranelift-codegen/meta/src/isa/x86/encodings.rs
index 686701229..05c15cd43 100644
--- a/cranelift-codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift-codegen/meta/src/isa/x86/encodings.rs
@@ -2012,6 +2012,20 @@ pub(crate) fn define(
         e.enc_32_64(isub, rec_fa.opcodes(opcodes));
     }
 
+    // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
+    // and I64x2. The necessary encodings are avoided for now (TODO) until we finalize whether they
+    // are essential to the Wasm SIMD spec (I64x2 has already been removed).
+    for (ty, opcodes, isap) in [
+        (I16, vec![0x66, 0x0f, 0xd5], None), // PMULLW from SSE2
+        (I32, vec![0x66, 0x0f, 0x38, 0x40], Some(use_sse41_simd)), // PMULLD from SSE4.1
+    ]
+    .iter()
+    .cloned()
+    {
+        let imul = imul.bind_vector_from_lane(ty, sse_vector_size);
+        e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), isap);
+    }
+
     // Reference type instructions
 
     // Null references implemented as iconst 0.
diff --git a/cranelift-codegen/meta/src/shared/instructions.rs b/cranelift-codegen/meta/src/shared/instructions.rs
index 2b340910d..51e303f0d 100644
--- a/cranelift-codegen/meta/src/shared/instructions.rs
+++ b/cranelift-codegen/meta/src/shared/instructions.rs
@@ -1682,8 +1682,7 @@ pub(crate) fn define(
         Wrapping integer multiplication: `a := x y \pmod{2^B}`.
 
         This instruction does not depend on the signed/unsigned interpretation
-        of the
-        operands.
+        of the operands.
 
         Polymorphic over all integer types (vector and scalar).
         "#,
diff --git a/cranelift-wasm/src/code_translator.rs b/cranelift-wasm/src/code_translator.rs
index d0516fbb5..70afb1b0d 100644
--- a/cranelift-wasm/src/code_translator.rs
+++ b/cranelift-wasm/src/code_translator.rs
@@ -986,6 +986,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let a = state.pop1();
             state.push1(builder.ins().ineg(a))
         }
+        Operator::I16x8Mul | Operator::I32x4Mul => {
+            let (a, b) = state.pop2();
+            state.push1(builder.ins().imul(a, b))
+        }
         Operator::V128Load { .. }
         | Operator::V128Store { .. }
         | Operator::V8x16Shuffle { .. }
@@ -1055,13 +1059,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::I16x8AddSaturateU
         | Operator::I16x8SubSaturateS
         | Operator::I16x8SubSaturateU
-        | Operator::I16x8Mul
         | Operator::I32x4AnyTrue
         | Operator::I32x4AllTrue
         | Operator::I32x4Shl
         | Operator::I32x4ShrS
         | Operator::I32x4ShrU
-        | Operator::I32x4Mul
         | Operator::I64x2AnyTrue
         | Operator::I64x2AllTrue
         | Operator::I64x2Shl
diff --git a/filetests/isa/x86/simd-arithmetic.clif b/filetests/isa/x86/simd-arithmetic.clif
index 824417772..e2714a91d 100644
--- a/filetests/isa/x86/simd-arithmetic.clif
+++ b/filetests/isa/x86/simd-arithmetic.clif
@@ -120,3 +120,47 @@ ebb0:
 
     return ; bin: c3
 }
+
+function %imul_i32x4() -> b1 {
+ebb0:
+[-, %xmm0]    v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01
+[-, %xmm1]    v1 = vconst.i32x4 [2 2 2 2]
+[-, %xmm0]    v2 = imul v0, v1 ; bin: 66 0f 38 40 c1
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, -2
+
+    v5 = extractlane v2, 1
+    v6 = icmp_imm eq v5, 0
+
+    v7 = extractlane v2, 3
+    v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped)
+
+    v9 = band v4, v6
+    v10 = band v8, v9
+    return v10
+}
+; run
+
+function %imul_i16x8() -> b1 {
+ebb0:
+[-, %xmm1]    v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff
+[-, %xmm2]    v1 = vconst.i16x8 [2 2 2 2 0 0 0 0]
+[-, %xmm1]    v2 = imul v0, v1 ; bin: 66 0f d5 ca
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being
+    ; uextend-ed, not sextend-ed
+
+    v5 = extractlane v2, 1
+    v6 = icmp_imm eq v5, 0
+
+    v7 = extractlane v2, 3
+    v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe
+
+    v9 = band v4, v6
+    v10 = band v8, v9
+
+    return v4
+}
+; run