Skip to content

Commit

Permalink
Bug 1656226 - Implement the experimental opcodes. r=jseward
Browse files Browse the repository at this point in the history
Implement some of the experimental SIMD opcodes that are supported by
all of V8, LLVM, and Binaryen, for maximum compatibility with test
content we might be exposed to.  Most/all of these will probably make
it into the spec, as they lead to substantial speedups in some
programs, and they are deterministic.

For spec and cpu mapping details, see:

WebAssembly/simd#122 (pmax/pmin)
WebAssembly/simd#232 (rounding)
WebAssembly/simd#127 (dot product)
WebAssembly/simd#237 (load zero)

The wasm bytecode values used here come from the binaryen changes that
are linked from those tickets, that's the best documentation right
now.  Current binaryen opcode mappings are here:
https://github.com/WebAssembly/binaryen/blob/master/src/wasm-binary.h

Also: Drive-by fix for signatures of vroundss and vroundsd, these are
unary operations and should follow the conventions for these with
src/dest arguments, not src0/src1/dest.

Also: Drive-by fix to add variants of vmovss and vmovsd on x64 that
take Operand source and FloatRegister destination.

Differential Revision: https://phabricator.services.mozilla.com/D85982
  • Loading branch information
Lars T Hansen committed Aug 12, 2020
1 parent 5b91ef7 commit 9c46265
Show file tree
Hide file tree
Showing 21 changed files with 766 additions and 45 deletions.
34 changes: 32 additions & 2 deletions js/src/jit-test/lib/wasm-binary.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ const I32Code = 0x7f;
const I64Code = 0x7e;
const F32Code = 0x7d;
const F64Code = 0x7c;
const V128Code = 0x7b;
const AnyFuncCode = 0x70;
const AnyrefCode = 0x6f;
const OptRefCode = 0x6c;
Expand All @@ -53,6 +54,7 @@ const CallCode = 0x10;
const CallIndirectCode = 0x11;
const DropCode = 0x1a;
const SelectCode = 0x1b;
const LocalGetCode = 0x20;
const I32Load = 0x28;
const I64Load = 0x29;
const F32Load = 0x2a;
Expand Down Expand Up @@ -102,6 +104,27 @@ const RefNullCode = 0xd0;
const RefIsNullCode = 0xd1;
const RefFuncCode = 0xd2;

// SIMD opcodes
const V128LoadCode = 0x00;
const V128StoreCode = 0x0b;

// Experimental SIMD opcodes as of August, 2020.
const I32x4DotSI16x8Code = 0xba;
const F32x4CeilCode = 0xd8;
const F32x4FloorCode = 0xd9;
const F32x4TruncCode = 0xda;
const F32x4NearestCode = 0xdb;
const F64x2CeilCode = 0xdc;
const F64x2FloorCode = 0xdd;
const F64x2TruncCode = 0xde;
const F64x2NearestCode = 0xdf;
const F32x4PMinCode = 0xea;
const F32x4PMaxCode = 0xeb;
const F64x2PMinCode = 0xf6;
const F64x2PMaxCode = 0xf7;
const V128Load32ZeroCode = 0xfc;
const V128Load64ZeroCode = 0xfd;

const FirstInvalidOpcode = 0xc5;
const LastInvalidOpcode = 0xfa;
const GcPrefix = 0xfb;
Expand Down Expand Up @@ -300,8 +323,15 @@ function exportSection(exports) {
body.push(...varU32(exports.length));
for (let exp of exports) {
body.push(...string(exp.name));
body.push(...varU32(FunctionCode));
body.push(...varU32(exp.funcIndex));
if (exp.hasOwnProperty("funcIndex")) {
body.push(...varU32(FunctionCode));
body.push(...varU32(exp.funcIndex));
} else if (exp.hasOwnProperty("memIndex")) {
body.push(...varU32(MemoryCode));
body.push(...varU32(exp.memIndex));
} else {
throw "Bad export " + exp;
}
}
return { name: exportId, body };
}
Expand Down
200 changes: 200 additions & 0 deletions js/src/jit-test/tests/wasm/simd/experimental.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
// Experimental opcodes. We have no text parsing support for these yet. The
// tests will be cleaned up and moved into ad-hack.js if the opcodes are
// adopted.

// When simd is enabled by default in release builds we will flip the value of
// SimdExperimentalEnabled to false in RELEASE_OR_BETA builds. At that point,
// these tests will start failing in release or beta builds, and a guard
// asserting !RELEASE_OR_BETA will have to be added above. That is how it
// should be.

load(libdir + "wasm-binary.js");

function wasmEval(bytes, imports) {
return new WebAssembly.Instance(new WebAssembly.Module(bytes), imports);
}

function get(arr, loc, len) {
let res = [];
for ( let i=0; i < len; i++ ) {
res.push(arr[loc+i]);
}
return res;
}

function set(arr, loc, vals) {
for ( let i=0; i < vals.length; i++ ) {
if (arr instanceof BigInt64Array) {
arr[loc+i] = BigInt(vals[i]);
} else {
arr[loc+i] = vals[i];
}
}
}

function assertSame(got, expected) {
assertEq(got.length, expected.length);
for ( let i=0; i < got.length; i++ ) {
let g = got[i];
let e = expected[i];
if (typeof g != typeof e) {
if (typeof g == "bigint")
e = BigInt(e);
else if (typeof e == "bigint")
g = BigInt(g);
}
assertEq(g, e);
}
}

function iota(len) {
let xs = [];
for ( let i=0 ; i < len ; i++ )
xs.push(i);
return xs;
}

function pmin(x, y) { return y < x ? y : x }
function pmax(x, y) { return x < y ? y : x }

function ffloor(x) { return Math.fround(Math.floor(x)) }
function fceil(x) { return Math.fround(Math.ceil(x)) }
function ftrunc(x) { return Math.fround(Math.sign(x)*Math.floor(Math.abs(x))) }
function fnearest(x) { return Math.fround(Math.round(x)) }

function dfloor(x) { return Math.floor(x) }
function dceil(x) { return Math.ceil(x) }
function dtrunc(x) { return Math.sign(x)*Math.floor(Math.abs(x)) }
function dnearest(x) { return Math.round(x) }

const v2vSig = {args:[], ret:VoidCode};

function V128Load(addr) {
return [I32ConstCode, varS32(addr),
SimdPrefix, V128LoadCode, 4, varU32(0)]
}

function V128StoreExpr(addr, v) {
return [I32ConstCode, varS32(addr),
...v,
SimdPrefix, V128StoreCode, 4, varU32(0)];
}

// Pseudo-min/max, https://github.com/WebAssembly/simd/pull/122
var fxs = [5, 1, -4, 2];
var fys = [6, 0, -7, 3];
var dxs = [5, 1];
var dys = [6, 0];

for ( let [opcode, xs, ys, operator] of [[F32x4PMinCode, fxs, fys, pmin],
[F32x4PMaxCode, fxs, fys, pmax],
[F64x2PMinCode, dxs, dys, pmin],
[F64x2PMaxCode, dxs, dys, pmax]] ) {
var k = xs.length;
var ans = iota(k).map((i) => operator(xs[i], ys[i]))

var ins = wasmEval(moduleWithSections([
sigSection([v2vSig]),
declSection([0]),
memorySection(1),
exportSection([{funcIndex: 0, name: "run"},
{memIndex: 0, name: "mem"}]),
bodySection([
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(16),
...V128Load(32),
SimdPrefix, varU32(opcode)])]})])]));

var mem = new (k == 4 ? Float32Array : Float64Array)(ins.exports.mem.buffer);
set(mem, k, xs);
set(mem, 2*k, ys);
ins.exports.run();
var result = get(mem, 0, k);
assertSame(result, ans);
}

// Widening integer dot product, https://github.com/WebAssembly/simd/pull/127

var ins = wasmEval(moduleWithSections([
sigSection([v2vSig]),
declSection([0]),
memorySection(1),
exportSection([{funcIndex: 0, name: "run"},
{memIndex: 0, name: "mem"}]),
bodySection([
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(16),
...V128Load(32),
SimdPrefix, varU32(I32x4DotSI16x8Code)])]})])]));

var xs = [5, 1, -4, 2, 20, -15, 12, 3];
var ys = [6, 0, -7, 3, 8, -1, -3, 7];
var ans = [xs[0]*ys[0] + xs[1]*ys[1],
xs[2]*ys[2] + xs[3]*ys[3],
xs[4]*ys[4] + xs[5]*ys[5],
xs[6]*ys[6] + xs[7]*ys[7]];

var mem16 = new Int16Array(ins.exports.mem.buffer);
var mem32 = new Int32Array(ins.exports.mem.buffer);
set(mem16, 8, xs);
set(mem16, 16, ys);
ins.exports.run();
var result = get(mem32, 0, 4);
assertSame(result, ans);

// Rounding, https://github.com/WebAssembly/simd/pull/232

var fxs = [5.1, -1.1, -4.3, 0];
var dxs = [5.1, -1.1];

for ( let [opcode, xs, operator] of [[F32x4CeilCode, fxs, fceil],
[F32x4FloorCode, fxs, ffloor],
[F32x4TruncCode, fxs, ftrunc],
[F32x4NearestCode, fxs, fnearest],
[F64x2CeilCode, dxs, dceil],
[F64x2FloorCode, dxs, dfloor],
[F64x2TruncCode, dxs, dtrunc],
[F64x2NearestCode, dxs, dnearest]] ) {
var k = xs.length;
var ans = xs.map(operator);

var ins = wasmEval(moduleWithSections([
sigSection([v2vSig]),
declSection([0]),
memorySection(1),
exportSection([{funcIndex: 0, name: "run"},
{memIndex: 0, name: "mem"}]),
bodySection([
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(16),
SimdPrefix, varU32(opcode)])]})])]));

var mem = new (k == 4 ? Float32Array : Float64Array)(ins.exports.mem.buffer);
set(mem, k, xs);
ins.exports.run();
var result = get(mem, 0, k);
assertSame(result, ans);
}

// Zero-extending SIMD load, https://github.com/WebAssembly/simd/pull/237

for ( let [opcode, k, log2align, cons, cast] of [[V128Load32ZeroCode, 4, 2, Int32Array, Number],
[V128Load64ZeroCode, 2, 3, BigInt64Array, BigInt]] ) {
var ins = wasmEval(moduleWithSections([
sigSection([v2vSig]),
declSection([0]),
memorySection(1),
exportSection([{funcIndex: 0, name: "run"},
{memIndex: 0, name: "mem"}]),
bodySection([
funcBody({locals:[],
body: [...V128StoreExpr(0, [I32ConstCode, varU32(16),
SimdPrefix, varU32(opcode), log2align, varU32(0)])]})])]));

var mem = new cons(ins.exports.mem.buffer);
mem[k] = cast(37);
ins.exports.run();
var result = get(mem, 0, k);
assertSame(result, iota(k).map((v) => v == 0 ? 37 : 0));
}

48 changes: 48 additions & 0 deletions js/src/jit/MacroAssembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2524,6 +2524,54 @@ class MacroAssembler : public MacroAssemblerSpecific {
inline void unsignedWidenLowInt32x4(FloatRegister src, FloatRegister dest)
DEFINED_ON(x86_shared);

// Compare-based minimum/maximum (experimental as of August, 2020)
// https://github.com/WebAssembly/simd/pull/122

inline void pseudoMinFloat32x4(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared);

inline void pseudoMinFloat64x2(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared);

inline void pseudoMaxFloat32x4(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared);

inline void pseudoMaxFloat64x2(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared);

// Widening/pairwise integer dot product (experimental as of August, 2020)
// https://github.com/WebAssembly/simd/pull/127

inline void widenDotInt16x8(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared);

// Floating point rounding (experimental as of August, 2020)
// https://github.com/WebAssembly/simd/pull/232

inline void ceilFloat32x4(FloatRegister src, FloatRegister dest)
DEFINED_ON(x86_shared);

inline void ceilFloat64x2(FloatRegister src, FloatRegister dest)
DEFINED_ON(x86_shared);

inline void floorFloat32x4(FloatRegister src, FloatRegister dest)
DEFINED_ON(x86_shared);

inline void floorFloat64x2(FloatRegister src, FloatRegister dest)
DEFINED_ON(x86_shared);

inline void truncFloat32x4(FloatRegister src, FloatRegister dest)
DEFINED_ON(x86_shared);

inline void truncFloat64x2(FloatRegister src, FloatRegister dest)
DEFINED_ON(x86_shared);

inline void nearestFloat32x4(FloatRegister src, FloatRegister dest)
DEFINED_ON(x86_shared);

inline void nearestFloat64x2(FloatRegister src, FloatRegister dest)
DEFINED_ON(x86_shared);

public:
// ========================================================================
// Truncate floating point.
Expand Down
1 change: 1 addition & 0 deletions js/src/jit/arm/MacroAssembler-arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6028,6 +6028,7 @@ void MacroAssemblerARM::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
}
}
} else {
MOZ_ASSERT(!access.isZeroExtendSimd128Load());
bool isFloat = output.isFloat();
if (isFloat) {
MOZ_ASSERT((byteSize == 4) == output.fpu().isSingle());
Expand Down
2 changes: 2 additions & 0 deletions js/src/jit/arm64/MacroAssembler-arm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,11 @@ void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
Ldr(SelectGPReg(outany, out64), srcAddr);
break;
case Scalar::Float32:
MOZ_ASSERT(!access.isZeroExtendSimd128Load());
Ldr(SelectFPReg(outany, out64, 32), srcAddr);
break;
case Scalar::Float64:
MOZ_ASSERT(!access.isZeroExtendSimd128Load());
Ldr(SelectFPReg(outany, out64, 64), srcAddr);
break;
case Scalar::Uint8Clamped:
Expand Down
2 changes: 2 additions & 0 deletions js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2126,9 +2126,11 @@ void MacroAssemblerMIPSShared::wasmLoadImpl(
isSigned = false;
break;
case Scalar::Float64:
MOZ_ASSERT(!access.isZeroExtendSimd128Load());
isFloat = true;
break;
case Scalar::Float32:
MOZ_ASSERT(!access.isZeroExtendSimd128Load());
isFloat = true;
break;
default:
Expand Down
11 changes: 10 additions & 1 deletion js/src/jit/shared/Assembler-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ class MemoryAccessDesc {
Scalar::Type type_;
jit::Synchronization sync_;
wasm::BytecodeOffset trapOffset_;
bool zeroExtendSimd128Load_;

public:
explicit MemoryAccessDesc(
Expand All @@ -502,7 +503,8 @@ class MemoryAccessDesc {
align_(align),
type_(type),
sync_(sync),
trapOffset_(trapOffset) {
trapOffset_(trapOffset),
zeroExtendSimd128Load_(false) {
MOZ_ASSERT(mozilla::IsPowerOfTwo(align));
}

Expand All @@ -513,6 +515,13 @@ class MemoryAccessDesc {
const jit::Synchronization& sync() const { return sync_; }
BytecodeOffset trapOffset() const { return trapOffset_; }
bool isAtomic() const { return !sync_.isNone(); }
bool isZeroExtendSimd128Load() const { return zeroExtendSimd128Load_; }

void setZeroExtendSimd128Load() {
MOZ_ASSERT(type() == Scalar::Float32 || type() == Scalar::Float64);
MOZ_ASSERT(!isAtomic());
zeroExtendSimd128Load_ = true;
}

void clearOffset() { offset_ = 0; }
void setOffset(uint32_t offset) { offset_ = offset; }
Expand Down
6 changes: 4 additions & 2 deletions js/src/jit/x64/MacroAssembler-x64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,10 +596,12 @@ void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
movl(srcAddr, out.gpr());
break;
case Scalar::Float32:
loadFloat32(srcAddr, out.fpu());
// vmovss does the right thing also for access.isZeroExtendSimdLoad()
vmovss(srcAddr, out.fpu());
break;
case Scalar::Float64:
loadDouble(srcAddr, out.fpu());
// vmovsd does the right thing also for access.isZeroExtendSimdLoad()
vmovsd(srcAddr, out.fpu());
break;
case Scalar::Simd128:
MacroAssemblerX64::loadUnalignedSimd128(srcAddr, out.fpu());
Expand Down
Loading

0 comments on commit 9c46265

Please sign in to comment.