Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SVE: Added Load2xVectorAndUnzip, Load3xVectorAndUnzip, Load4xVectorAndUnzip APIs #102180

Merged
merged 24 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4383,6 +4383,18 @@ void emitter::emitInsSve_R_R_R(instruction ins,
case INS_sve_ld1w:
case INS_sve_ld1sw:
case INS_sve_ld1d:
case INS_sve_ld2b:
TIHan marked this conversation as resolved.
Show resolved Hide resolved
case INS_sve_ld2h:
case INS_sve_ld2w:
case INS_sve_ld2d:
case INS_sve_ld3b:
case INS_sve_ld3h:
case INS_sve_ld3w:
case INS_sve_ld3d:
case INS_sve_ld4b:
case INS_sve_ld4h:
case INS_sve_ld4w:
case INS_sve_ld4d:
TIHan marked this conversation as resolved.
Show resolved Hide resolved
return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt);

default:
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26594,6 +26594,9 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_Sve_LoadVectorUInt16ZeroExtendToUInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendToInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendToUInt64:
case NI_Sve_Load2xVectorAndUnzip:
case NI_Sve_Load3xVectorAndUnzip:
case NI_Sve_Load4xVectorAndUnzip:
addr = Op(2);
break;
#endif // TARGET_ARM64
Expand Down Expand Up @@ -27115,6 +27118,13 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
return compiler->typGetBlkLayout(64);

case NI_Sve_Load2xVectorAndUnzip:
return compiler->typGetBlkLayout(compiler->getVectorTByteLength() * 2);
TIHan marked this conversation as resolved.
Show resolved Hide resolved
case NI_Sve_Load3xVectorAndUnzip:
return compiler->typGetBlkLayout(compiler->getVectorTByteLength() * 3);
case NI_Sve_Load4xVectorAndUnzip:
return compiler->typGetBlkLayout(compiler->getVectorTByteLength() * 4);

#endif // TARGET_ARM64

default:
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1603,6 +1603,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
// Op1 input is a vector. HWInstrinsic requires a mask.
retNode->AsHWIntrinsic()->Op(1) = gtNewSimdConvertVectorToMaskNode(retType, op1, simdBaseJitType, simdSize);
}

if (HWIntrinsicInfo::IsMultiReg(intrinsic))
{
assert(HWIntrinsicInfo::IsExplicitMaskedOperation(retNode->AsHWIntrinsic()->GetHWIntrinsicId()));
assert(HWIntrinsicInfo::IsMultiReg(retNode->AsHWIntrinsic()->GetHWIntrinsicId()));
retNode =
impStoreMultiRegValueToVar(retNode, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
}
}

if (retType != nodeRetType)
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,7 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_LoadAndReplicateToVector64x2:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
case NI_Sve_Load2xVectorAndUnzip:
return 2;

case NI_AdvSimd_LoadVector64x3AndUnzip:
Expand All @@ -841,6 +842,7 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_LoadAndReplicateToVector64x3:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
case NI_Sve_Load3xVectorAndUnzip:
return 3;

case NI_AdvSimd_LoadVector64x4AndUnzip:
Expand All @@ -851,6 +853,7 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_LoadAndReplicateToVector64x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
case NI_Sve_Load4xVectorAndUnzip:
return 4;
#endif

Expand Down
29 changes: 29 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2296,6 +2296,35 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
retNode = impStoreMultiRegValueToVar(op1, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
break;
}

case NI_Sve_Load2xVectorAndUnzip:
case NI_Sve_Load3xVectorAndUnzip:
case NI_Sve_Load4xVectorAndUnzip:
{
info.compNeedsConsecutiveRegisters = true;

assert(sig->numArgs == 2);

op2 = impPopStack().val;
op1 = impPopStack().val;

if (op2->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
// we really want, so throw away the cast.
if (op2->gtGetOp1()->TypeGet() == TYP_BYREF)
{
op2 = op2->gtGetOp1();
}
}

assert(HWIntrinsicInfo::IsMultiReg(intrinsic));
assert(HWIntrinsicInfo::IsExplicitMaskedOperation(intrinsic));

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
break;
}

case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32,
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Load2xVectorAndUnzip, -1, 2, true, {INS_sve_ld2b, INS_sve_ld2b, INS_sve_ld2h, INS_sve_ld2h, INS_sve_ld2w, INS_sve_ld2w, INS_sve_ld2d, INS_sve_ld2d, INS_sve_ld2w, INS_sve_ld2d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Load3xVectorAndUnzip, -1, 2, true, {INS_sve_ld3b, INS_sve_ld3b, INS_sve_ld3h, INS_sve_ld3h, INS_sve_ld3w, INS_sve_ld3w, INS_sve_ld3d, INS_sve_ld3d, INS_sve_ld3w, INS_sve_ld3d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Load4xVectorAndUnzip, -1, 2, true, {INS_sve_ld4b, INS_sve_ld4b, INS_sve_ld4h, INS_sve_ld4h, INS_sve_ld4w, INS_sve_ld4w, INS_sve_ld4d, INS_sve_ld4d, INS_sve_ld4w, INS_sve_ld4d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Max, -1, -1, false, {INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_fmax, INS_sve_fmax}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, MaxAcross, -1, -1, false, {INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_fmaxv, INS_sve_fmaxv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, MaxNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnm, INS_sve_fmaxnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1780,6 +1780,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
break;
}

case NI_Sve_Load2xVectorAndUnzip:
case NI_Sve_Load3xVectorAndUnzip:
case NI_Sve_Load4xVectorAndUnzip:
{
assert(intrin.op1 != nullptr);
assert(intrin.op2 != nullptr);
assert(intrinsicTree->OperIsMemoryLoadOrStore());
srcCount += BuildAddrUses(intrin.op2);
BuildConsecutiveRegistersForDef(intrinsicTree, dstCount);
*pDstCount = dstCount;
break;
}

case NI_Sve_StoreAndZipx2:
case NI_Sve_StoreAndZipx3:
case NI_Sve_StoreAndZipx4:
Expand Down
Loading
Loading