Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SVE: Added Load2xVectorAndUnzip, Load3xVectorAndUnzip, Load4xVectorAndUnzip APIs #102180

Merged
merged 24 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4381,6 +4381,18 @@ void emitter::emitInsSve_R_R_R(instruction ins,
case INS_sve_ld1w:
case INS_sve_ld1sw:
case INS_sve_ld1d:
case INS_sve_ld2b:
TIHan marked this conversation as resolved.
Show resolved Hide resolved
case INS_sve_ld2h:
case INS_sve_ld2w:
case INS_sve_ld2d:
case INS_sve_ld3b:
case INS_sve_ld3h:
case INS_sve_ld3w:
case INS_sve_ld3d:
case INS_sve_ld4b:
case INS_sve_ld4h:
case INS_sve_ld4w:
case INS_sve_ld4d:
TIHan marked this conversation as resolved.
Show resolved Hide resolved
return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt);

default:
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26540,6 +26540,9 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_Sve_LoadVectorUInt16ZeroExtendToUInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendToInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendToUInt64:
case NI_Sve_LoadVectorx2:
case NI_Sve_LoadVectorx3:
case NI_Sve_LoadVectorx4:
addr = Op(2);
break;
#endif // TARGET_ARM64
Expand Down Expand Up @@ -27054,6 +27057,13 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
return compiler->typGetBlkLayout(64);

case NI_Sve_LoadVectorx2:
return compiler->typGetBlkLayout(compiler->getVectorTByteLength() * 2);
TIHan marked this conversation as resolved.
Show resolved Hide resolved
case NI_Sve_LoadVectorx3:
return compiler->typGetBlkLayout(compiler->getVectorTByteLength() * 3);
case NI_Sve_LoadVectorx4:
return compiler->typGetBlkLayout(compiler->getVectorTByteLength() * 4);

#endif // TARGET_ARM64

default:
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1637,6 +1637,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
// Op1 input is a vector. HWInstrinsic requires a mask.
retNode->AsHWIntrinsic()->Op(1) = gtNewSimdConvertVectorToMaskNode(retType, op1, simdBaseJitType, simdSize);
}

if (HWIntrinsicInfo::IsMultiReg(intrinsic))
{
assert(HWIntrinsicInfo::IsExplicitMaskedOperation(retNode->AsHWIntrinsic()->GetHWIntrinsicId()));
assert(HWIntrinsicInfo::IsMultiReg(retNode->AsHWIntrinsic()->GetHWIntrinsicId()));
retNode =
impStoreMultiRegValueToVar(retNode, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
}
}

if (retType != nodeRetType)
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,7 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_LoadAndReplicateToVector64x2:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
case NI_Sve_LoadVectorx2:
return 2;

case NI_AdvSimd_LoadVector64x3AndUnzip:
Expand All @@ -836,6 +837,7 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_LoadAndReplicateToVector64x3:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
case NI_Sve_LoadVectorx3:
return 3;

case NI_AdvSimd_LoadVector64x4AndUnzip:
Expand All @@ -846,6 +848,7 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_LoadAndReplicateToVector64x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
case NI_Sve_LoadVectorx4:
return 4;
#endif

Expand Down
29 changes: 29 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2070,6 +2070,35 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
retNode = impStoreMultiRegValueToVar(op1, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
break;
}

case NI_Sve_LoadVectorx2:
TIHan marked this conversation as resolved.
Show resolved Hide resolved
case NI_Sve_LoadVectorx3:
case NI_Sve_LoadVectorx4:
{
info.compNeedsConsecutiveRegisters = true;

assert(sig->numArgs == 2);

op2 = impPopStack().val;
op1 = impPopStack().val;

if (op1->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
// we really want, so throw away the cast.
if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
{
op1 = op1->gtGetOp1();
}
}

assert(HWIntrinsicInfo::IsMultiReg(intrinsic));
assert(HWIntrinsicInfo::IsExplicitMaskedOperation(intrinsic));

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
break;
}

case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32,
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorx2, -1, 2, true, {INS_sve_ld2b, INS_sve_ld2b, INS_sve_ld2h, INS_sve_ld2h, INS_sve_ld2w, INS_sve_ld2w, INS_sve_ld2d, INS_sve_ld2d, INS_sve_ld2w, INS_sve_ld2d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
TIHan marked this conversation as resolved.
Show resolved Hide resolved
HARDWARE_INTRINSIC(Sve, LoadVectorx3, -1, 2, true, {INS_sve_ld3b, INS_sve_ld3b, INS_sve_ld3h, INS_sve_ld3h, INS_sve_ld3w, INS_sve_ld3w, INS_sve_ld3d, INS_sve_ld3d, INS_sve_ld3w, INS_sve_ld3d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(Sve, LoadVectorx4, -1, 2, true, {INS_sve_ld4b, INS_sve_ld4b, INS_sve_ld4h, INS_sve_ld4h, INS_sve_ld4w, INS_sve_ld4w, INS_sve_ld4d, INS_sve_ld4d, INS_sve_ld4w, INS_sve_ld4d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(Sve, Max, -1, -1, false, {INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_fmax, INS_sve_fmax}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, MaxAcross, -1, -1, false, {INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_fmaxv, INS_sve_fmaxv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation)
HARDWARE_INTRINSIC(Sve, MaxNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnm, INS_sve_fmaxnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1758,6 +1758,18 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
break;
}

case NI_Sve_LoadVectorx2:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this can be combined with above cases:

            case NI_Sve_LoadVectorx2:
            case NI_Sve_LoadVectorx3:
            case NI_Sve_LoadVectorx4:
                assert(intrin.op2 != nullptr);
                srcCount += BuildOperandUses(intrin.op2);
                FALLTHROUGH;
            case NI_AdvSimd_LoadVector64x2AndUnzip:
            case NI_AdvSimd_LoadVector64x3AndUnzip:
            case NI_AdvSimd_LoadVector64x4AndUnzip:
            case NI_AdvSimd_Arm64_LoadVector128x2AndUnzip:
            case NI_AdvSimd_Arm64_LoadVector128x3AndUnzip:
            case NI_AdvSimd_Arm64_LoadVector128x4AndUnzip:
            case NI_AdvSimd_LoadVector64x2:
            case NI_AdvSimd_LoadVector64x3:
            case NI_AdvSimd_LoadVector64x4:
            case NI_AdvSimd_Arm64_LoadVector128x2:
            case NI_AdvSimd_Arm64_LoadVector128x3:
            case NI_AdvSimd_Arm64_LoadVector128x4:
            case NI_AdvSimd_LoadAndReplicateToVector64x2:
            case NI_AdvSimd_LoadAndReplicateToVector64x3:
            case NI_AdvSimd_LoadAndReplicateToVector64x4:
            case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
            case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
            case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
            {
                assert(intrin.op1 != nullptr);
                BuildConsecutiveRegistersForDef(intrinsicTree, dstCount);
                *pDstCount = dstCount;
                break;
            }

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still not resolved.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't do this as a fallthrough because there is another fallthrough above it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, never mind.

case NI_Sve_LoadVectorx3:
case NI_Sve_LoadVectorx4:
{
assert(intrin.op1 != nullptr);
assert(intrin.op2 != nullptr);
srcCount += BuildOperandUses(intrin.op2);
BuildConsecutiveRegistersForDef(intrinsicTree, dstCount);
*pDstCount = dstCount;
break;
}

default:
noway_assert(!"Not a supported as multiple consecutive register intrinsic");
}
Expand Down
Loading
Loading