Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm AdvSimd: Rename StoreVectorMxN to Store #103689

Merged
merged 4 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 51 additions & 56 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1909,22 +1909,63 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
}

case NI_AdvSimd_Store:
case NI_AdvSimd_Arm64_Store:
{
assert(retType == TYP_VOID);
assert(sig->numArgs == 2);
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = impPopStack().val;

var_types simdType = getSIMDTypeForSize(simdSize);
if (op2->TypeGet() == TYP_STRUCT)
{
info.compNeedsConsecutiveRegisters = true;
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);

op2 = impSIMDPopStack();
op1 = impPopStack().val;
if (!op2->OperIs(GT_LCL_VAR))
{
unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreVectorN"));

if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
{
// If what we have is a BYREF, that's what we really want, so throw away the cast.
op1 = op1->gtGetOp1();
impStoreToTemp(tmp, op2, CHECK_SPILL_NONE);
op2 = gtNewLclvNode(tmp, argType);
}
op2 = gtConvertTableOpToFieldList(op2, fieldCount);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);

if (op1->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
// we really want, so throw away the cast.
if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
{
op1 = op1->gtGetOp1();
}
}

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
}
else
{
if (op2->TypeGet() == TYP_SIMD16)
{
// Update the simdSize explicitly as Vector128 variant of Store() is present in AdvSimd instead of
SwapnilGaikwad marked this conversation as resolved.
Show resolved Hide resolved
// AdvSimd.Arm64.
simdSize = 16;
}

var_types simdType = getSIMDTypeForSize(simdSize);
op1 = impPopStack().val;

if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
{
// If what we have is a BYREF, that's what we really want, so throw away the cast.
op1 = op1->gtGetOp1();
}

retNode = gtNewSimdStoreNode(op1, op2, simdBaseJitType, simdSize);
retNode = gtNewSimdStoreNode(op1, op2, simdBaseJitType, simdSize);
}
break;
}

Expand Down Expand Up @@ -2080,52 +2121,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_AdvSimd_StoreVector64x2:
case NI_AdvSimd_StoreVector64x3:
case NI_AdvSimd_StoreVector64x4:
case NI_AdvSimd_Arm64_StoreVector128x2:
case NI_AdvSimd_Arm64_StoreVector128x3:
case NI_AdvSimd_Arm64_StoreVector128x4:
{
assert(sig->numArgs == 2);
assert(retType == TYP_VOID);

CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = impPopStack().val;
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);

assert(op2->TypeGet() == TYP_STRUCT);
if (op1->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
// we really want, so throw away the cast.
if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
{
op1 = op1->gtGetOp1();
}
}

if (!op2->OperIs(GT_LCL_VAR))
{
unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreVectorNx2 temp tree"));

impStoreToTemp(tmp, op2, CHECK_SPILL_NONE);
op2 = gtNewLclvNode(tmp, argType);
}
op2 = gtConvertTableOpToFieldList(op2, fieldCount);

info.compNeedsConsecutiveRegisters = true;
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
break;
}

case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
{
Expand Down
44 changes: 6 additions & 38 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1280,40 +1280,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_AdvSimd_StoreVector64x2:
case NI_AdvSimd_StoreVector64x3:
case NI_AdvSimd_StoreVector64x4:
case NI_AdvSimd_Arm64_StoreVector128x2:
case NI_AdvSimd_Arm64_StoreVector128x3:
case NI_AdvSimd_Arm64_StoreVector128x4:
{
unsigned regCount = 0;

assert(intrin.op2->OperIsFieldList());

GenTreeFieldList* fieldList = intrin.op2->AsFieldList();
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
op2Reg = firstField->GetRegNum();

#ifdef DEBUG
regNumber argReg = op2Reg;
for (GenTreeFieldList::Use& use : fieldList->Uses())
{
regCount++;

GenTree* argNode = use.GetNode();
assert(argReg == argNode->GetRegNum());
argReg = getNextSIMDRegWithWraparound(argReg);
}
assert((ins == INS_st1_2regs && regCount == 2) || (ins == INS_st2 && regCount == 2) ||
(ins == INS_st1_3regs && regCount == 3) || (ins == INS_st3 && regCount == 3) ||
(ins == INS_st1_4regs && regCount == 4) || (ins == INS_st4 && regCount == 4));
#endif

GetEmitter()->emitIns_R_R(ins, emitSize, op2Reg, op1Reg, opt);
break;
}

case NI_AdvSimd_Store:
case NI_AdvSimd_Arm64_Store:
case NI_AdvSimd_StoreVectorAndZip:
case NI_AdvSimd_Arm64_StoreVectorAndZip:
{
Expand All @@ -1336,24 +1304,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
#endif
}

bool isSequentialStore = (intrin.id == NI_AdvSimd_Arm64_Store || intrin.id == NI_AdvSimd_Store);
switch (regCount)
{
case 2:
ins = INS_st2;
ins = isSequentialStore ? INS_st1_2regs : INS_st2;
break;

case 3:
ins = INS_st3;
ins = isSequentialStore ? INS_st1_3regs : INS_st3;
break;

case 4:
ins = INS_st4;
ins = isSequentialStore ? INS_st1_4regs : INS_st4;
break;

default:
unreached();
}

GetEmitter()->emitIns_R_R(ins, emitSize, op2Reg, op1Reg, opt);
break;
}
Expand Down
9 changes: 2 additions & 7 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,12 +464,9 @@ HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalScalar,
HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower, 8, 1, true, {INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, true, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AdvSimd, Store, 8, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVectorAndZip, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2, 8, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3, 8, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_invalid, INS_invalid, INS_st1_3regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4, 8, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_invalid, INS_invalid, INS_st1_4regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, Subtract, -1, 2, true, {INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_fsub, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingLower, 8, 2, true, {INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingUpper, 16, 3, true, {INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics)
Expand Down Expand Up @@ -663,9 +660,7 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal,
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVectorAndZip, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2, 16, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3, 16, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4, 16, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, Store, 16, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsub}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd_Arm64, SubtractSaturateScalar, 8, 2, true, {INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, -1, 2, true, {INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1}, HW_Category_SIMD, HW_Flag_NoFlag)
Expand Down
8 changes: 2 additions & 6 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1725,14 +1725,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
break;
}

case NI_AdvSimd_Store:
case NI_AdvSimd_Arm64_Store:
case NI_AdvSimd_StoreVectorAndZip:
case NI_AdvSimd_Arm64_StoreVectorAndZip:
case NI_AdvSimd_StoreVector64x2:
case NI_AdvSimd_StoreVector64x3:
case NI_AdvSimd_StoreVector64x4:
case NI_AdvSimd_Arm64_StoreVector128x2:
case NI_AdvSimd_Arm64_StoreVector128x3:
case NI_AdvSimd_Arm64_StoreVector128x4:
{
assert(intrin.op1 != nullptr);
srcCount += BuildConsecutiveRegistersForUse(intrin.op2);
Expand Down
Loading
Loading