Skip to content

Commit

Permalink
Add support for Sve.Store() (dotnet#102262)
Browse files Browse the repository at this point in the history
* Add support for Sve.Store()

* Fix formatting issues

* Remove incorrect instructions from comment

* Rename Sve.Store() -> Sve.StoreAndZip()

* Refactor test templates
  • Loading branch information
SwapnilGaikwad authored and steveharter committed May 28, 2024
1 parent d10682f commit 7beb4a4
Show file tree
Hide file tree
Showing 13 changed files with 2,221 additions and 2 deletions.
9 changes: 8 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26706,7 +26706,14 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore(GenTree** pAddr) const
case NI_SSE2_MaskMove:
addr = Op(3);
break;
#endif // TARGET_XARCH
#elif defined(TARGET_ARM64)
case NI_Sve_StoreAndZip:
case NI_Sve_StoreAndZipx2:
case NI_Sve_StoreAndZipx3:
case NI_Sve_StoreAndZipx4:
addr = Op(2);
break;
#endif // TARGET_ARM64

default:
addr = Op(1);
Expand Down
53 changes: 53 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2426,6 +2426,59 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Sve_StoreAndZip:
{
assert(sig->numArgs == 3);
assert(retType == TYP_VOID);

CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
op3 = impPopStack().val;
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);

if (op3->TypeGet() == TYP_STRUCT)
{
info.compNeedsConsecutiveRegisters = true;
switch (fieldCount)
{
case 2:
intrinsic = NI_Sve_StoreAndZipx2;
break;

case 3:
intrinsic = NI_Sve_StoreAndZipx3;
break;

case 4:
intrinsic = NI_Sve_StoreAndZipx4;
break;

default:
assert("unsupported");
}

if (!op3->OperIs(GT_LCL_VAR))
{
unsigned tmp = lvaGrabTemp(true DEBUGARG("SveStoreN"));

impStoreToTemp(tmp, op3, CHECK_SPILL_NONE);
op3 = gtNewLclvNode(tmp, argType);
}
op3 = gtConvertTableOpToFieldList(op3, fieldCount);
}

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = getArgForHWIntrinsic(argType, argClass);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
break;
}

default:
{
return nullptr;
Expand Down
61 changes: 61 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,67 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
break;

case NI_Sve_StoreAndZipx2:
case NI_Sve_StoreAndZipx3:
case NI_Sve_StoreAndZipx4:
{
assert(intrin.op3->OperIsFieldList());
GenTreeFieldList* fieldList = intrin.op3->AsFieldList();
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
op3Reg = firstField->GetRegNum();

#ifdef DEBUG
unsigned regCount = 0;
regNumber argReg = op3Reg;
for (GenTreeFieldList::Use& use : fieldList->Uses())
{
regCount++;

GenTree* argNode = use.GetNode();
assert(argReg == argNode->GetRegNum());
argReg = getNextSIMDRegWithWraparound(argReg);
}

switch (ins)
{
case INS_sve_st2b:
case INS_sve_st2d:
case INS_sve_st2h:
case INS_sve_st2w:
case INS_sve_st2q:
assert(regCount == 2);
break;

case INS_sve_st3b:
case INS_sve_st3d:
case INS_sve_st3h:
case INS_sve_st3w:
case INS_sve_st3q:
assert(regCount == 3);
break;

case INS_sve_st4b:
case INS_sve_st4d:
case INS_sve_st4h:
case INS_sve_st4w:
case INS_sve_st4q:
assert(regCount == 4);
break;

default:
unreached();
}
#endif
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, op3Reg, op1Reg, op2Reg, 0, opt);
break;
}

case NI_Sve_StoreAndZip:
{
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, op3Reg, op1Reg, op2Reg, 0, opt);
break;
}

case NI_Vector64_ToVector128:
GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false);
break;
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ HARDWARE_INTRINSIC(Sve, SignExtend32,
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, true, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
Expand All @@ -138,6 +139,9 @@ HARDWARE_INTRINSIC(Sve, ZipLow,
HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, true, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation)
HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, true, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, CreateTrueMaskAll, -1, -1, false, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, StoreAndZipx2, -1, 3, true, {INS_sve_st2b, INS_sve_st2b, INS_sve_st2h, INS_sve_st2h, INS_sve_st2w, INS_sve_st2w, INS_sve_st2d, INS_sve_st2d, INS_sve_st2w, INS_sve_st2d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(Sve, StoreAndZipx3, -1, 3, true, {INS_sve_st3b, INS_sve_st3b, INS_sve_st3h, INS_sve_st3h, INS_sve_st3w, INS_sve_st3w, INS_sve_st3d, INS_sve_st3d, INS_sve_st3w, INS_sve_st3d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(Sve, StoreAndZipx4, -1, 3, true, {INS_sve_st4b, INS_sve_st4b, INS_sve_st4h, INS_sve_st4h, INS_sve_st4w, INS_sve_st4w, INS_sve_st4d, INS_sve_st4d, INS_sve_st4w, INS_sve_st4d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)


#endif // FEATURE_HW_INTRINSIC
Expand Down
18 changes: 18 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1758,6 +1758,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
break;
}

case NI_Sve_StoreAndZipx2:
case NI_Sve_StoreAndZipx3:
case NI_Sve_StoreAndZipx4:
{
assert(intrin.op2 != nullptr);
assert(intrin.op3 != nullptr);
srcCount += BuildAddrUses(intrin.op2);
srcCount += BuildConsecutiveRegistersForUse(intrin.op3);
assert(dstCount == 0);
buildInternalRegisterUses();
*pDstCount = 0;
break;
}

default:
noway_assert(!"Not a supported as multiple consecutive register intrinsic");
}
Expand Down Expand Up @@ -1894,6 +1908,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
}
}
else if (intrin.id == NI_Sve_StoreAndZip)
{
srcCount += BuildAddrUses(intrin.op2);
}
else
{
regMaskTP candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;
Expand Down
Loading

0 comments on commit 7beb4a4

Please sign in to comment.