Skip to content

Commit

Permalink
[WebAssembly] Prototype extending multiplication SIMD instructions
Browse files Browse the repository at this point in the history
As proposed in WebAssembly/simd#376. This commit
implements new builtin functions and intrinsics for these instructions, but does
not yet add them to wasm_simd128.h because they have not yet been merged to the
proposal. These are the first instructions with opcodes greater than 0xff, so
this commit updates the MC layer and disassembler to handle that correctly.

Differential Revision: https://reviews.llvm.org/D90253
  • Loading branch information
tlively authored and arichardson committed Mar 25, 2021
2 parents eac6537 + 31e9445 commit 1948015
Show file tree
Hide file tree
Showing 9 changed files with 410 additions and 7 deletions.
15 changes: 15 additions & 0 deletions clang/include/clang/Basic/BuiltinsWebAssembly.def
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,21 @@ TARGET_BUILTIN(__builtin_wasm_popcnt_i8x16, "V16ScV16Sc", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_q15mulr_saturate_s_i8x16, "V8sV8sV8s", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16ScV16ScV16ScIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128")

Expand Down
43 changes: 43 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16959,6 +16959,49 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::wasm_q15mulr_saturate_signed);
return Builder.CreateCall(Callee, {LHS, RHS});
}
case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: {
Value *LHS = EmitScalarExpr(E->getArg(0));
Value *RHS = EmitScalarExpr(E->getArg(1));
unsigned IntNo;
switch (BuiltinID) {
case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
IntNo = Intrinsic::wasm_extmul_low_signed;
break;
case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
IntNo = Intrinsic::wasm_extmul_low_unsigned;
break;
case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
IntNo = Intrinsic::wasm_extmul_high_signed;
break;
case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2:
IntNo = Intrinsic::wasm_extmul_high_unsigned;
break;
default:
llvm_unreachable("unexptected builtin ID");
}

Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
case WebAssembly::BI__builtin_wasm_bitselect: {
Value *V1 = EmitScalarExpr(E->getArg(0));
Value *V2 = EmitScalarExpr(E->getArg(1));
Expand Down
84 changes: 84 additions & 0 deletions clang/test/CodeGen/builtins-wasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,90 @@ i16x8 q15mulr_saturate_s_i16x8(i16x8 x, i16x8 y) {
// WEBASSEMBLY-NEXT: ret
}

i16x8 extmul_low_i8x16_s_i16x8(i8x16 x, i8x16 y) {
return __builtin_wasm_extmul_low_i8x16_s_i16x8(x, y);
// WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(
// WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
// WEBASSEMBLY-NEXT: ret
}

i16x8 extmul_high_i8x16_s_i16x8(i8x16 x, i8x16 y) {
return __builtin_wasm_extmul_high_i8x16_s_i16x8(x, y);
// WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(
// WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
// WEBASSEMBLY-NEXT: ret
}

u16x8 extmul_low_i8x16_u_i16x8(u8x16 x, u8x16 y) {
return __builtin_wasm_extmul_low_i8x16_u_i16x8(x, y);
// WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(
// WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
// WEBASSEMBLY-NEXT: ret
}

u16x8 extmul_high_i8x16_u_i16x8(u8x16 x, u8x16 y) {
return __builtin_wasm_extmul_high_i8x16_u_i16x8(x, y);
// WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(
// WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
// WEBASSEMBLY-NEXT: ret
}

i32x4 extmul_low_i16x8_s_i32x4(i16x8 x, i16x8 y) {
return __builtin_wasm_extmul_low_i16x8_s_i32x4(x, y);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(
// WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
// WEBASSEMBLY-NEXT: ret
}

i32x4 extmul_high_i16x8_s_i32x4(i16x8 x, i16x8 y) {
return __builtin_wasm_extmul_high_i16x8_s_i32x4(x, y);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(
// WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
// WEBASSEMBLY-NEXT: ret
}

u32x4 extmul_low_i16x8_u_i32x4(u16x8 x, u16x8 y) {
return __builtin_wasm_extmul_low_i16x8_u_i32x4(x, y);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(
// WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
// WEBASSEMBLY-NEXT: ret
}

u32x4 extmul_high_i16x8_u_i32x4(u16x8 x, u16x8 y) {
return __builtin_wasm_extmul_high_i16x8_u_i32x4(x, y);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(
// WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
// WEBASSEMBLY-NEXT: ret
}

i64x2 extmul_low_i32x4_s_i64x2(i32x4 x, i32x4 y) {
return __builtin_wasm_extmul_low_i32x4_s_i64x2(x, y);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(
// WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
// WEBASSEMBLY-NEXT: ret
}

i64x2 extmul_high_i32x4_s_i64x2(i32x4 x, i32x4 y) {
return __builtin_wasm_extmul_high_i32x4_s_i64x2(x, y);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(
// WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
// WEBASSEMBLY-NEXT: ret
}

u64x2 extmul_low_i32x4_u_i64x2(u32x4 x, u32x4 y) {
return __builtin_wasm_extmul_low_i32x4_u_i64x2(x, y);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(
// WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
// WEBASSEMBLY-NEXT: ret
}

u64x2 extmul_high_i32x4_u_i64x2(u32x4 x, u32x4 y) {
return __builtin_wasm_extmul_high_i32x4_u_i64x2(x, y);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(
// WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
// WEBASSEMBLY-NEXT: ret
}

i32x4 dot_i16x8_s(i16x8 x, i16x8 y) {
return __builtin_wasm_dot_s_i32x4_i16x8(x, y);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.dot(<8 x i16> %x, <8 x i16> %y)
Expand Down
17 changes: 17 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsWebAssembly.td
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,23 @@ def int_wasm_store64_lane :
def int_wasm_popcnt :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem, IntrSpeculatable]>;

def int_wasm_extmul_low_signed :
Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_extmul_high_signed :
Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_extmul_low_unsigned :
Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_extmul_high_unsigned :
Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
[IntrNoMem, IntrSpeculatable]>;

//===----------------------------------------------------------------------===//
// Thread-local storage intrinsics
//===----------------------------------------------------------------------===//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,16 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
uint64_t Start = OS.tell();

uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
if (Binary <= UINT8_MAX) {
if (Binary < (1 << 8)) {
OS << uint8_t(Binary);
} else {
assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet");
} else if (Binary < (1 << 16)) {
OS << uint8_t(Binary >> 8);
encodeULEB128(uint8_t(Binary), OS);
} else if (Binary < (1 << 24)) {
OS << uint8_t(Binary >> 16);
encodeULEB128(uint16_t(Binary), OS);
} else {
llvm_unreachable("Very large (prefix + 3 byte) opcodes not supported");
}

// For br_table instructions, encode the size of the table. In the MCInst,
Expand Down
55 changes: 54 additions & 1 deletion llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
list<dag> pattern_r, string asmstr_r = "",
string asmstr_s = "", bits<32> simdop = -1> {
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
!or(0xfd00, !and(0xff, simdop))>,
!if(!ge(simdop, 0x100),
!or(0xfd0000, !and(0xffff, simdop)),
!or(0xfd00, !and(0xff, simdop)))>,
Requires<[HasSIMD128]>;
}

Expand Down Expand Up @@ -935,6 +937,57 @@ defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
"i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
186>;

// Extending multiplication: extmul_{low,high}_P, extmul_high
multiclass SIMDExtBinary<ValueType vec_t, ValueType arg_t, string vec,
SDNode node, string name, bits<32> simdop> {
defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
(outs), (ins),
[(set (vec_t V128:$dst),
(node (arg_t V128:$lhs), (arg_t V128:$rhs))
)],
vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name,
simdop>;
}

defm EXTMUL_LOW_S :
SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_low_signed,
"extmul_low_i8x16_s", 154>;
defm EXTMUL_HIGH_S :
SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_high_signed,
"extmul_high_i8x16_s", 157>;
defm EXTMUL_LOW_U :
SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_low_unsigned,
"extmul_low_i8x16_u", 158>;
defm EXTMUL_HIGH_U :
SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_high_unsigned,
"extmul_high_i8x16_u", 159>;

defm EXTMUL_LOW_S :
SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_low_signed,
"extmul_low_i16x8_s", 187>;
defm EXTMUL_HIGH_S :
SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_high_signed,
"extmul_high_i16x8_s", 189>;
defm EXTMUL_LOW_U :
SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_low_unsigned,
"extmul_low_i16x8_u", 190>;
defm EXTMUL_HIGH_U :
SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_high_unsigned,
"extmul_high_i16x8_u", 191>;

defm EXTMUL_LOW_S :
SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_low_signed,
"extmul_low_i32x4_s", 210>;
defm EXTMUL_HIGH_S :
SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_high_signed,
"extmul_high_i32x4_s", 211>;
defm EXTMUL_LOW_U :
SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_low_unsigned,
"extmul_low_i32x4_u", 214>;
defm EXTMUL_HIGH_U :
SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_high_unsigned,
"extmul_high_i32x4_u", 215>;

//===----------------------------------------------------------------------===//
// Floating-point unary arithmetic
//===----------------------------------------------------------------------===//
Expand Down
Loading

0 comments on commit 1948015

Please sign in to comment.