Skip to content

Commit

Permalink
Porting more of the SIMD intrinsics to be implemented as HWIntrinsics (
Browse files Browse the repository at this point in the history
…#36579)

* Porting Ceiling and Floor to use SimdAsHWIntrinsic

* Porting SquareRoot to use SimdAsHWIntrinsic

* Porting ConditionalSelect to use SimdAsHWIntrinsic

* Porting get_AllBitsSet, get_Count, and get_Zero to use SimdAsHWIntrinsic

* Porting op_Explicit to use SimdAsHWIntrinsic

* Changing Vector2/3/4 and Vector<T>.Equals to forward to operator ==

* Removing SIMDIntrinsicAbs

* Removing SIMDIntrinsicMax and SIMDIntrinsicMin

* Removing SIMDIntrinsicCeil and SIMDIntrinsicFloor

* Porting op_Equality and op_Inequality to use SimdAsHWIntrinsic

* Removing SIMDIntrinsicSqrt

* Removing SIMDIntrinsicSelect

* Removing SIMDIntrinsicBitwiseAndNot and SIMDIntrinsicBitwiseXor

* Removing SIMDIntrinsicGreaterThanOrEqual and SIMDIntrinsicLessThanOrEqual

* Removing SIMDIntrinsicGreaterThan and SIMDIntrinsicLessThan

* Removing SIMDIntrinsicInstEquals

* Removing SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality

* Porting this.Equals to use SimdAsHWIntrinsic

* Don't handle IEquatable`1.Equals via SimdAsHWIntrinsic

* Applying formatting patch

* Account for op2 being able to precede op1 in the LIR order

* Ensure SimdAsHWIntrinsic with 0 args are properly handled

* Fixup the arm64 LowerHWIntrinsicCmpOp implementation to use LowerNodeCC

* Fixing an assert in the NotSupported HWIntrinsic tests
  • Loading branch information
tannergooding authored May 22, 2020
1 parent 46b430a commit 1c66ad1
Show file tree
Hide file tree
Showing 31 changed files with 1,114 additions and 1,976 deletions.
1 change: 0 additions & 1 deletion src/coreclr/src/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genSIMDIntrinsicInit(GenTreeSIMD* simdNode);
void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode);
void genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode);
void genSIMDIntrinsicUnOpWithImm(GenTreeSIMD* simdNode);
void genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode);
void genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode);
void genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode);
Expand Down
167 changes: 3 additions & 164 deletions src/coreclr/src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3821,15 +3821,11 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
genSIMDIntrinsicInitN(simdNode);
break;

case SIMDIntrinsicSqrt:
case SIMDIntrinsicAbs:
case SIMDIntrinsicCast:
case SIMDIntrinsicConvertToSingle:
case SIMDIntrinsicConvertToInt32:
case SIMDIntrinsicConvertToDouble:
case SIMDIntrinsicConvertToInt64:
case SIMDIntrinsicCeil:
case SIMDIntrinsicFloor:
genSIMDIntrinsicUnOp(simdNode);
break;

Expand All @@ -3847,24 +3843,11 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
case SIMDIntrinsicMul:
case SIMDIntrinsicDiv:
case SIMDIntrinsicBitwiseAnd:
case SIMDIntrinsicBitwiseAndNot:
case SIMDIntrinsicBitwiseOr:
case SIMDIntrinsicBitwiseXor:
case SIMDIntrinsicMin:
case SIMDIntrinsicMax:
case SIMDIntrinsicEqual:
case SIMDIntrinsicLessThan:
case SIMDIntrinsicGreaterThan:
case SIMDIntrinsicLessThanOrEqual:
case SIMDIntrinsicGreaterThanOrEqual:
genSIMDIntrinsicBinOp(simdNode);
break;

case SIMDIntrinsicOpEquality:
case SIMDIntrinsicOpInEquality:
genSIMDIntrinsicRelOp(simdNode);
break;

case SIMDIntrinsicDotProduct:
genSIMDIntrinsicDotProduct(simdNode);
break;
Expand All @@ -3888,10 +3871,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
genSIMDIntrinsicUpperRestore(simdNode);
break;

case SIMDIntrinsicSelect:
NYI("SIMDIntrinsicSelect lowered during import to (a & sel) | (b & ~sel)");
break;

default:
noway_assert(!"Unimplemented SIMD intrinsic.");
unreached();
Expand Down Expand Up @@ -3949,24 +3928,15 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
switch (intrinsicId)
{
case SIMDIntrinsicAbs:
result = INS_fabs;
break;
case SIMDIntrinsicAdd:
result = INS_fadd;
break;
case SIMDIntrinsicBitwiseAnd:
result = INS_and;
break;
case SIMDIntrinsicBitwiseAndNot:
result = INS_bic;
break;
case SIMDIntrinsicBitwiseOr:
result = INS_orr;
break;
case SIMDIntrinsicBitwiseXor:
result = INS_eor;
break;
case SIMDIntrinsicCast:
result = INS_mov;
break;
Expand All @@ -3980,24 +3950,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicEqual:
result = INS_fcmeq;
break;
case SIMDIntrinsicGreaterThan:
result = INS_fcmgt;
break;
case SIMDIntrinsicGreaterThanOrEqual:
result = INS_fcmge;
break;
case SIMDIntrinsicLessThan:
result = INS_fcmlt;
break;
case SIMDIntrinsicLessThanOrEqual:
result = INS_fcmle;
break;
case SIMDIntrinsicMax:
result = INS_fmax;
break;
case SIMDIntrinsicMin:
result = INS_fmin;
break;
case SIMDIntrinsicMul:
result = INS_fmul;
break;
Expand All @@ -4006,12 +3958,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
// Return lower bytes instruction here
result = INS_fcvtn;
break;
case SIMDIntrinsicSelect:
result = INS_bsl;
break;
case SIMDIntrinsicSqrt:
result = INS_fsqrt;
break;
case SIMDIntrinsicSub:
result = INS_fsub;
break;
Expand All @@ -4021,12 +3967,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicWidenHi:
result = INS_fcvtl2;
break;
case SIMDIntrinsicCeil:
result = INS_frintp;
break;
case SIMDIntrinsicFloor:
result = INS_frintm;
break;
default:
assert(!"Unsupported SIMD intrinsic");
unreached();
Expand All @@ -4038,25 +3978,15 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type

switch (intrinsicId)
{
case SIMDIntrinsicAbs:
assert(!isUnsigned);
result = INS_abs;
break;
case SIMDIntrinsicAdd:
result = INS_add;
break;
case SIMDIntrinsicBitwiseAnd:
result = INS_and;
break;
case SIMDIntrinsicBitwiseAndNot:
result = INS_bic;
break;
case SIMDIntrinsicBitwiseOr:
result = INS_orr;
break;
case SIMDIntrinsicBitwiseXor:
result = INS_eor;
break;
case SIMDIntrinsicCast:
result = INS_mov;
break;
Expand All @@ -4067,26 +3997,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicEqual:
result = INS_cmeq;
break;
case SIMDIntrinsicGreaterThan:
result = isUnsigned ? INS_cmhi : INS_cmgt;
break;
case SIMDIntrinsicGreaterThanOrEqual:
result = isUnsigned ? INS_cmhs : INS_cmge;
break;
case SIMDIntrinsicLessThan:
assert(!isUnsigned);
result = INS_cmlt;
break;
case SIMDIntrinsicLessThanOrEqual:
assert(!isUnsigned);
result = INS_cmle;
break;
case SIMDIntrinsicMax:
result = isUnsigned ? INS_umax : INS_smax;
break;
case SIMDIntrinsicMin:
result = isUnsigned ? INS_umin : INS_smin;
break;
case SIMDIntrinsicMul:
result = INS_mul;
break;
Expand All @@ -4095,9 +4005,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
// Return lower bytes instruction here
result = INS_xtn;
break;
case SIMDIntrinsicSelect:
result = INS_bsl;
break;
case SIMDIntrinsicSub:
result = INS_sub;
break;
Expand Down Expand Up @@ -4256,13 +4163,11 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
//
void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs ||
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64 ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCeil || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicFloor);
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64);

GenTree* op1 = simdNode->gtGetOp1();
var_types baseType = simdNode->gtSIMDBaseType;
Expand Down Expand Up @@ -4407,14 +4312,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThanOrEqual ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThanOrEqual);
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual);

GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();
Expand Down Expand Up @@ -4442,65 +4340,6 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
genProduceReg(simdNode);
}

//--------------------------------------------------------------------------------
// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater
// == and !=
//
// Arguments:
// simdNode - The GT_SIMD node
//
// Return Value:
// None.
//
void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);

GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();
var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->GetRegNum();
var_types targetType = simdNode->TypeGet();

genConsumeOperands(simdNode);
regNumber op1Reg = op1->GetRegNum();
regNumber op2Reg = op2->GetRegNum();
regNumber otherReg = op2Reg;

instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, baseType);
emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
insOpts opt = genGetSimdInsOpt(attr, baseType);

// TODO-ARM64-CQ Contain integer constants where possible

regNumber tmpFloatReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);

GetEmitter()->emitIns_R_R_R(ins, attr, tmpFloatReg, op1Reg, op2Reg, opt);

if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
{
// For 12Byte vectors we must set upper bits to get correct comparison
// We do not assume upper bits are zero.
instGen_Set_Reg_To_Imm(EA_4BYTE, targetReg, -1);
GetEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpFloatReg, targetReg, 3);
}

GetEmitter()->emitIns_R_R(INS_uminv, attr, tmpFloatReg, tmpFloatReg,
(simdNode->gtSIMDSize > 8) ? INS_OPTS_16B : INS_OPTS_8B);

GetEmitter()->emitIns_R_R_I(INS_mov, EA_1BYTE, targetReg, tmpFloatReg, 0);

if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality)
{
GetEmitter()->emitIns_R_R_I(INS_eor, EA_4BYTE, targetReg, targetReg, 0x1);
}

GetEmitter()->emitIns_R_R_I(INS_and, EA_4BYTE, targetReg, targetReg, 0x1);

genProduceReg(simdNode);
}

//--------------------------------------------------------------------------------
// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
//
Expand Down
53 changes: 11 additions & 42 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2621,6 +2621,16 @@ class Compiler
var_types baseType,
unsigned size);

GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types type,
NamedIntrinsic hwIntrinsicID,
var_types baseType,
unsigned size)
{
GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, hwIntrinsicID, baseType, size);
node->gtFlags |= GTF_SIMDASHW_OP;
return node;
}

GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(
var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size)
{
Expand Down Expand Up @@ -7970,9 +7980,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

static bool isRelOpSIMDIntrinsic(SIMDIntrinsicID intrinsicId)
{
return (intrinsicId == SIMDIntrinsicEqual || intrinsicId == SIMDIntrinsicLessThan ||
intrinsicId == SIMDIntrinsicLessThanOrEqual || intrinsicId == SIMDIntrinsicGreaterThan ||
intrinsicId == SIMDIntrinsicGreaterThanOrEqual);
return (intrinsicId == SIMDIntrinsicEqual);
}

// Returns base type of a TYP_SIMD local.
Expand Down Expand Up @@ -8076,22 +8084,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// Create a GT_SIMD tree for a Get property of SIMD vector with a fixed index.
GenTreeSIMD* impSIMDGetFixed(var_types simdType, var_types baseType, unsigned simdSize, int index);

// Creates a GT_SIMD tree for Select operation
GenTree* impSIMDSelect(CORINFO_CLASS_HANDLE typeHnd,
var_types baseType,
unsigned simdVectorSize,
GenTree* op1,
GenTree* op2,
GenTree* op3);

// Creates a GT_SIMD tree for Min/Max operation
GenTree* impSIMDMinMax(SIMDIntrinsicID intrinsicId,
CORINFO_CLASS_HANDLE typeHnd,
var_types baseType,
unsigned simdVectorSize,
GenTree* op1,
GenTree* op2);

// Transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain given relop result.
SIMDIntrinsicID impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
Expand All @@ -8101,9 +8093,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
GenTree** op1,
GenTree** op2);

// Creates a GT_SIMD tree for Abs intrinsic.
GenTree* impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned simdVectorSize, GenTree* op1);

#if defined(TARGET_XARCH)

// Transforms operands and returns the SIMD intrinsic to be applied on
Expand All @@ -8113,26 +8102,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
GenTree** op1,
GenTree** op2);

// Transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain > comparison result.
SIMDIntrinsicID impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd,
unsigned simdVectorSize,
GenTree** op1,
GenTree** op2);

// Transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain >= comparison result.
SIMDIntrinsicID impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd,
unsigned simdVectorSize,
GenTree** op1,
GenTree** op2);

// Transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain >= comparison result in case of int32
// and small int base type vectors.
SIMDIntrinsicID impSIMDIntegralRelOpGreaterThanOrEqual(
CORINFO_CLASS_HANDLE typeHnd, unsigned simdVectorSize, var_types baseType, GenTree** op1, GenTree** op2);

#endif // defined(TARGET_XARCH)

void setLclRelatedToSIMDIntrinsic(GenTree* tree);
Expand Down
Loading

0 comments on commit 1c66ad1

Please sign in to comment.