Skip to content

Commit

Permalink
Port SIMDIntrinsicGetItem and SIMDIntrinsicSetItem to be implemented …
Browse files Browse the repository at this point in the history
…via HWIntrinsics (#52288)

* Port SIMDIntrinsicGetItem and SIMDIntrinsicSetItem to be implemented using SimdAsHWIntrinsic

* Apply suggestions from code review

Co-authored-by: Egor Chesakov <Egor.Chesakov@microsoft.com>

* Resolving mismerge

* Added a comment explaining why we sometimes return and sometimes do containment checks

* Update src/coreclr/jit/lsraarm64.cpp

Co-authored-by: Egor Chesakov <Egor.Chesakov@microsoft.com>

Co-authored-by: Egor Chesakov <Egor.Chesakov@microsoft.com>
  • Loading branch information
tannergooding and echesakov authored May 18, 2021
1 parent 68ebecb commit a2b7648
Show file tree
Hide file tree
Showing 31 changed files with 1,663 additions and 1,544 deletions.
2 changes: 0 additions & 2 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -977,8 +977,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode);
void genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode);
void genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode);
void genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode);
void genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode);
void genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode);
void genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode);
void genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode);
Expand Down
247 changes: 0 additions & 247 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3873,17 +3873,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
genSIMDIntrinsicBinOp(simdNode);
break;

case SIMDIntrinsicGetItem:
genSIMDIntrinsicGetItem(simdNode);
break;

case SIMDIntrinsicSetX:
case SIMDIntrinsicSetY:
case SIMDIntrinsicSetZ:
case SIMDIntrinsicSetW:
genSIMDIntrinsicSetItem(simdNode);
break;

case SIMDIntrinsicUpperSave:
genSIMDIntrinsicUpperSave(simdNode);
break;
Expand Down Expand Up @@ -4346,242 +4335,6 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
genProduceReg(simdNode);
}

//------------------------------------------------------------------------------------
// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i.
//
// Arguments:
// simdNode - The GT_SIMD node
//
// Return Value:
// None.
//
void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem);

GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();
var_types simdType = op1->TypeGet();
assert(varTypeIsSIMD(simdType));

// op1 of TYP_SIMD12 should be considered as TYP_SIMD16
if (simdType == TYP_SIMD12)
{
simdType = TYP_SIMD16;
}

var_types baseType = simdNode->GetSimdBaseType();
regNumber targetReg = simdNode->GetRegNum();
assert(targetReg != REG_NA);
var_types targetType = simdNode->TypeGet();
assert(targetType == genActualType(baseType));

// GetItem has 2 operands:
// - the source of SIMD type (op1)
// - the index of the value to be returned.
genConsumeOperands(simdNode);

emitAttr baseTypeSize = emitTypeSize(baseType);
unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));

if (op2->IsCnsIntOrI())
{
assert(op2->isContained());

ssize_t index = op2->AsIntCon()->gtIconVal;

// We only need to generate code for the get if the index is valid
// If the index is invalid, previously generated for the range check will throw
if (GetEmitter()->isValidVectorIndex(emitTypeSize(simdType), baseTypeSize, index))
{
if (op1->isContained())
{
int offset = (int)index * genTypeSize(baseType);
instruction ins = ins_Load(baseType);

assert(!op1->isUsedFromReg());

if (op1->OperIsLocal())
{
unsigned varNum = op1->AsLclVarCommon()->GetLclNum();

GetEmitter()->emitIns_R_S(ins, emitActualTypeSize(baseType), targetReg, varNum, offset);
}
else
{
assert(op1->OperGet() == GT_IND);

GenTree* addr = op1->AsIndir()->Addr();
assert(!addr->isContained());
regNumber baseReg = addr->GetRegNum();

// ldr targetReg, [baseReg, #offset]
GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(baseType), targetReg, baseReg, offset);
}
}
else
{
assert(op1->isUsedFromReg());
regNumber srcReg = op1->GetRegNum();

instruction ins;
if (varTypeIsFloating(baseType))
{
assert(genIsValidFloatReg(targetReg));
// dup targetReg, srcReg[#index]
ins = INS_dup;
}
else
{
assert(genIsValidIntReg(targetReg));
if (varTypeIsUnsigned(baseType) || (baseTypeSize == EA_8BYTE))
{
// umov targetReg, srcReg[#index]
ins = INS_umov;
}
else
{
// smov targetReg, srcReg[#index]
ins = INS_smov;
}
}
GetEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, srcReg, index);
}
}
}
else
{
assert(!op2->isContained());

regNumber baseReg = REG_NA;
regNumber indexReg = op2->GetRegNum();

if (op1->isContained())
{
// Optimize the case of op1 is in memory and trying to access ith element.
assert(!op1->isUsedFromReg());
if (op1->OperIsLocal())
{
unsigned varNum = op1->AsLclVarCommon()->GetLclNum();

baseReg = simdNode->ExtractTempReg();

// Load the address of varNum
GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0);
}
else
{
// Require GT_IND addr to be not contained.
assert(op1->OperGet() == GT_IND);

GenTree* addr = op1->AsIndir()->Addr();
assert(!addr->isContained());

baseReg = addr->GetRegNum();
}
}
else
{
assert(op1->isUsedFromReg());
regNumber srcReg = op1->GetRegNum();

unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);

baseReg = simdNode->ExtractTempReg();

// Load the address of simdInitTempVarNum
GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0);

// Store the vector to simdInitTempVarNum
GetEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, baseReg);
}

assert(genIsValidIntReg(indexReg));
assert(genIsValidIntReg(baseReg));
assert(baseReg != indexReg);

// Load item at baseReg[index]
GetEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, baseReg, indexReg, INS_OPTS_LSL,
baseTypeScale);
}

genProduceReg(simdNode);
}

//------------------------------------------------------------------------------------
// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i.
//
// Arguments:
// simdNode - The GT_SIMD node
//
// Return Value:
// None.
//
void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
{
// Determine index based on intrinsic ID
int index = -1;
switch (simdNode->gtSIMDIntrinsicID)
{
case SIMDIntrinsicSetX:
index = 0;
break;
case SIMDIntrinsicSetY:
index = 1;
break;
case SIMDIntrinsicSetZ:
index = 2;
break;
case SIMDIntrinsicSetW:
index = 3;
break;

default:
unreached();
}
assert(index != -1);

// op1 is the SIMD vector
// op2 is the value to be set
GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();

var_types baseType = simdNode->GetSimdBaseType();
regNumber targetReg = simdNode->GetRegNum();
assert(targetReg != REG_NA);
var_types targetType = simdNode->TypeGet();
assert(varTypeIsSIMD(targetType));

assert(op2->TypeGet() == baseType);
assert(simdNode->GetSimdSize() >= ((index + 1) * genTypeSize(baseType)));

genConsumeOperands(simdNode);
regNumber op1Reg = op1->GetRegNum();
regNumber op2Reg = op2->GetRegNum();

assert(genIsValidFloatReg(targetReg));
assert(genIsValidFloatReg(op1Reg));
assert(genIsValidIntReg(op2Reg) || genIsValidFloatReg(op2Reg));
assert(targetReg != op2Reg);

emitAttr attr = emitTypeSize(baseType);

// Insert mov if register assignment requires it
GetEmitter()->emitIns_Mov(INS_mov, EA_16BYTE, targetReg, op1Reg, /* canSkip */ false);

if (genIsValidIntReg(op2Reg))
{
GetEmitter()->emitIns_R_R_I(INS_ins, attr, targetReg, op2Reg, index);
}
else
{
GetEmitter()->emitIns_R_R_I_I(INS_ins, attr, targetReg, op2Reg, index, 0);
}

genProduceReg(simdNode);
}

//-----------------------------------------------------------------------------
// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to
// the given register, if any, or to memory.
Expand Down
23 changes: 18 additions & 5 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2906,6 +2906,21 @@ class Compiler
GenTreeHWIntrinsic* gtNewSimdCreateBroadcastNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);

GenTreeHWIntrinsic* gtNewSimdGetElementNode(var_types type,
GenTree* op1,
GenTree* op2,
CorInfoType simdBaseJitType,
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTreeHWIntrinsic* gtNewSimdWithElementNode(var_types type,
GenTree* op1,
GenTree* op2,
GenTree* op3,
CorInfoType simdBaseJitType,
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types type,
NamedIntrinsic hwIntrinsicID,
CorInfoType simdBaseJitType,
Expand Down Expand Up @@ -4116,6 +4131,7 @@ class Compiler
GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, CorInfoType simdBaseJitType);
GenTree* addRangeCheckIfNeeded(
NamedIntrinsic intrinsic, GenTree* immOp, bool mustExpand, int immLowerBound, int immUpperBound);
GenTree* addRangeCheckForHWIntrinsic(GenTree* immOp, int immLowerBound, int immUpperBound);

#ifdef TARGET_XARCH
GenTree* impBaseIntrinsic(NamedIntrinsic intrinsic,
Expand Down Expand Up @@ -5899,8 +5915,8 @@ class Compiler
unsigned* indexOut,
unsigned* simdSizeOut,
bool ignoreUsedInSIMDIntrinsic = false);
GenTree* fgMorphFieldAssignToSIMDIntrinsicSet(GenTree* tree);
GenTree* fgMorphFieldToSIMDIntrinsicGet(GenTree* tree);
GenTree* fgMorphFieldAssignToSimdSetElement(GenTree* tree);
GenTree* fgMorphFieldToSimdGetElement(GenTree* tree);
bool fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt);
void impMarkContiguousSIMDFieldAssignments(Statement* stmt);

Expand Down Expand Up @@ -8545,9 +8561,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes.
GenTree* impSIMDPopStack(var_types type, bool expectAddr = false, CORINFO_CLASS_HANDLE structType = nullptr);

// Create a GT_SIMD tree for a Get property of SIMD vector with a fixed index.
GenTreeSIMD* impSIMDGetFixed(var_types simdType, CorInfoType simdBaseJitType, unsigned simdSize, int index);

// Transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain given relop result.
SIMDIntrinsicID impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
Expand Down
Loading

0 comments on commit a2b7648

Please sign in to comment.