From e89c0ab397a757cfc279f678303b1b6b0bc47b3e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 14 Dec 2022 20:48:34 -0800 Subject: [PATCH 01/20] Updating SimdAsHWIntrinsic to handle the InitN methods --- src/coreclr/jit/simdashwintrinsic.cpp | 243 ++++++++++++++++++- src/coreclr/jit/simdashwintrinsiclistarm64.h | 3 + src/coreclr/jit/simdashwintrinsiclistxarch.h | 3 + 3 files changed, 242 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index b298eb89c2a78..18072ad2ca654 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -396,6 +396,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; GenTree* op2 = nullptr; GenTree* op3 = nullptr; + GenTree* op4 = nullptr; + GenTree* op5 = nullptr; unsigned numArgs = sig->numArgs; bool isInstanceMethod = false; @@ -1741,18 +1743,18 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case 3: { - assert(newobjThis == nullptr); - if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)) { impSpillSideEffect(true, verCurrentState.esStackDepth - - 3 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); + ((newobjThis == nullptr) ? 3 : 2) + DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); } if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp2(intrinsic)) { impSpillSideEffect(true, verCurrentState.esStackDepth - - 2 DEBUGARG("Spilling op2 side effects for SimdAsHWIntrinsic")); + ((newobjThis == nullptr) ? 2 : 1) + DEBUGARG("Spilling op2 side effects for SimdAsHWIntrinsic")); } CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); @@ -1764,9 +1766,21 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = getArgForHWIntrinsic(argType, argClass); - argType = isInstanceMethod ? simdType - : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod, newobjThis); + bool implicitConstructor = isInstanceMethod && (newobjThis == nullptr) && (retType == TYP_VOID); + + if (implicitConstructor) + { + op1 = getArgForHWIntrinsic(TYP_BYREF, argClass, isInstanceMethod, newobjThis); + } + else + { + argType = isInstanceMethod + ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + + op1 = getArgForHWIntrinsic(argType, (newobjThis != nullptr) ? clsHnd : argClass, isInstanceMethod, + newobjThis); + } assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); @@ -1842,6 +1856,47 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 + case NI_Vector2_Create: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert(simdSize == 8); + + if (op2->IsCnsFltOrDbl() && op3->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD8); + + float cnsVal = 0; + + vecCon->gtSimd8Val.f32[0] = static_cast(op2->AsDblCon()->DconValue()); + vecCon->gtSimd8Val.f32[1] = static_cast(op3->AsDblCon()->DconValue()); + + copyBlkSrc = vecCon; + } + else + { +#if defined(TARGET_XARCH) + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), 4); + + nodeBuilder.AddOperand(0, op2); + nodeBuilder.AddOperand(1, op3); + nodeBuilder.AddOperand(2, gtNewZeroConNode(TYP_FLOAT)); + nodeBuilder.AddOperand(3, gtNewZeroConNode(TYP_FLOAT)); + + copyBlkSrc = gtNewSimdHWIntrinsicNode(TYP_SIMD8, std::move(nodeBuilder), NI_Vector128_Create, + simdBaseJitType, 16, /* isSimdAsHWIntrinsic */ true); +#elif defined(TARGET_ARM64) + copyBlkSrc = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op2, op3, NI_Vector64_Create, simdBaseJitType, + 8, /* isSimdAsHWIntrinsic */ true); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + } + + copyBlkDst = op1; + break; + } + default: { // Some platforms warn about unhandled switch cases @@ -1849,6 +1904,180 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } } + break; + } + + case 4: + { + assert(isInstanceMethod); + assert(SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)); + { + impSpillSideEffect(true, verCurrentState.esStackDepth - + ((newobjThis == nullptr) ? 4 : 3) + DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); + } + + assert(!SimdAsHWIntrinsicInfo::SpillSideEffectsOp2(intrinsic)); + + CORINFO_ARG_LIST_HANDLE arg2 = argList; + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass))); + op4 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + if ((newobjThis == nullptr) && (retType == TYP_VOID)) + { + op1 = getArgForHWIntrinsic(TYP_BYREF, argClass, isInstanceMethod, newobjThis); + } + else + { + op1 = getArgForHWIntrinsic(simdType, (newobjThis != nullptr) ? clsHnd : argClass, isInstanceMethod, + newobjThis); + } + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + + switch (intrinsic) + { + case NI_Vector3_Create: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert(simdSize == 12); + + if (op2->IsCnsFltOrDbl() && op3->IsCnsFltOrDbl() && op4->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD12); + + float cnsVal = 0; + + vecCon->gtSimd12Val.f32[0] = static_cast(op2->AsDblCon()->DconValue()); + vecCon->gtSimd12Val.f32[1] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimd12Val.f32[2] = static_cast(op4->AsDblCon()->DconValue()); + + copyBlkSrc = vecCon; + } + else + { + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), 4); + + nodeBuilder.AddOperand(0, op2); + nodeBuilder.AddOperand(1, op3); + nodeBuilder.AddOperand(2, op4); + nodeBuilder.AddOperand(3, gtNewZeroConNode(TYP_FLOAT)); + + copyBlkSrc = gtNewSimdHWIntrinsicNode(TYP_SIMD12, std::move(nodeBuilder), NI_Vector128_Create, + simdBaseJitType, 16, /* isSimdAsHWIntrinsic */ true); + } + + copyBlkDst = op1; + break; + } + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + break; + } + + case 5: + { + assert(isInstanceMethod); + assert(SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)); + { + impSpillSideEffect(true, verCurrentState.esStackDepth - + ((newobjThis == nullptr) ? 5 : 4) + DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); + } + + assert(!SimdAsHWIntrinsicInfo::SpillSideEffectsOp2(intrinsic)); + + CORINFO_ARG_LIST_HANDLE arg2 = argList; + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3); + CORINFO_ARG_LIST_HANDLE arg5 = info.compCompHnd->getArgNext(arg4); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg5, &argClass))); + op5 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass))); + op4 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + if ((newobjThis == nullptr) && (retType == TYP_VOID)) + { + op1 = getArgForHWIntrinsic(TYP_BYREF, argClass, isInstanceMethod, newobjThis); + } + else + { + op1 = getArgForHWIntrinsic(simdType, (newobjThis != nullptr) ? clsHnd : argClass, isInstanceMethod, + newobjThis); + } + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + + switch (intrinsic) + { + case NI_Vector4_Create: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert(simdSize == 16); + + if (op2->IsCnsFltOrDbl() && op3->IsCnsFltOrDbl() && op4->IsCnsFltOrDbl() && op5->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD16); + + float cnsVal = 0; + + vecCon->gtSimd16Val.f32[0] = static_cast(op2->AsDblCon()->DconValue()); + vecCon->gtSimd16Val.f32[1] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimd16Val.f32[2] = static_cast(op4->AsDblCon()->DconValue()); + vecCon->gtSimd16Val.f32[3] = static_cast(op5->AsDblCon()->DconValue()); + + copyBlkSrc = vecCon; + } + else + { + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), 4); + + nodeBuilder.AddOperand(0, op2); + nodeBuilder.AddOperand(1, op3); + nodeBuilder.AddOperand(2, op4); + nodeBuilder.AddOperand(3, op5); + + copyBlkSrc = gtNewSimdHWIntrinsicNode(TYP_SIMD16, std::move(nodeBuilder), NI_Vector128_Create, + simdBaseJitType, 16, /* isSimdAsHWIntrinsic */ true); + } + + copyBlkDst = op1; + break; + } + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + break; } } diff --git a/src/coreclr/jit/simdashwintrinsiclistarm64.h b/src/coreclr/jit/simdashwintrinsiclistarm64.h index 31f56a584a83a..a25e9e932d145 100644 --- a/src/coreclr/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/jit/simdashwintrinsiclistarm64.hector2 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector2, Create, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector2, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector2, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -58,6 +59,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRootector3 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector3, Create, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -78,6 +80,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRootector4 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector4, Create, ".ctor", 5, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/jit/simdashwintrinsiclistxarch.h b/src/coreclr/jit/simdashwintrinsiclistxarch.h index a8a09bc0cfa98..70400d8dfd42f 100644 --- a/src/coreclr/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/jit/simdashwintrinsiclistxarch.hector2 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector2, Create, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector2, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector2, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -58,6 +59,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRootector3 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector3, Create, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -78,6 +80,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRoot, // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector4, Create, ".ctor", 5, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) From 3826f057df341a07c950077850c13042459f4e06 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 06:45:51 -0800 Subject: [PATCH 02/20] Remove SIMDIntrinsicInitN as it is dead --- src/coreclr/jit/codegen.h | 10 -- src/coreclr/jit/codegenarm64.cpp | 74 ------------ src/coreclr/jit/codegenloongarch64.cpp | 15 --- src/coreclr/jit/gentree.h | 14 --- src/coreclr/jit/lower.cpp | 48 -------- src/coreclr/jit/lsraarm64.cpp | 23 ---- src/coreclr/jit/lsraxarch.cpp | 21 ---- src/coreclr/jit/simd.cpp | 101 +--------------- src/coreclr/jit/simdcodegenxarch.cpp | 157 ------------------------- src/coreclr/jit/simdintrinsiclist.h | 5 - 10 files changed, 1 insertion(+), 467 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 25774eea9468f..69f95ec94fa90 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1075,22 +1075,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCompareInt(GenTree* treeNode); #ifdef FEATURE_SIMD - enum SIMDScalarMoveType{ - SMT_ZeroInitUpper, // zero initlaize target upper bits - SMT_ZeroInitUpper_SrcHasUpperZeros, // zero initialize target upper bits; source upper bits are known to be zero - SMT_PreserveUpper // preserve target upper bits - }; - #ifdef TARGET_ARM64 insOpts genGetSimdInsOpt(emitAttr size, var_types elementType); #endif #ifdef TARGET_XARCH instruction getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival = nullptr); #endif - void genSIMDScalarMove( - var_types targetType, var_types type, regNumber target, regNumber src, SIMDScalarMoveType moveType); - void genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg); - void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode); void genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode); void genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode); void genSIMDLo64BitConvert(SIMDIntrinsicID intrinsicID, diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8cb9db68b9685..0adfcc5484641 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5044,10 +5044,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) switch (simdNode->GetSIMDIntrinsicId()) { - case SIMDIntrinsicInitN: - genSIMDIntrinsicInitN(simdNode); - break; - case SIMDIntrinsicUpperSave: genSIMDIntrinsicUpperSave(simdNode); break; @@ -5095,76 +5091,6 @@ insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) return result; } -//------------------------------------------------------------------------------------------- -// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes -// a number of arguments equal to the length of the Vector. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) -{ - assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInitN); - - regNumber targetReg = simdNode->GetRegNum(); - assert(targetReg != REG_NA); - - var_types targetType = simdNode->TypeGet(); - var_types baseType = simdNode->GetSimdBaseType(); - emitAttr baseTypeSize = emitTypeSize(baseType); - regNumber vectorReg = targetReg; - size_t initCount = simdNode->GetOperandCount(); - - assert((initCount * baseTypeSize) <= simdNode->GetSimdSize()); - - if (varTypeIsFloating(baseType)) - { - // Note that we cannot use targetReg before consuming all float source operands. - // Therefore use an internal temp register - vectorReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); - } - - // We will first consume the list items in execution (left to right) order, - // and record the registers. - regNumber operandRegs[FP_REGSIZE_BYTES]; - for (size_t i = 1; i <= initCount; i++) - { - GenTree* operand = simdNode->Op(i); - assert(operand->TypeIs(baseType)); - assert(!operand->isContained()); - - operandRegs[i - 1] = genConsumeReg(operand); - } - - if (initCount * baseTypeSize < EA_16BYTE) - { - GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, vectorReg, 0x00, INS_OPTS_16B); - } - - if (varTypeIsIntegral(baseType)) - { - for (unsigned i = 0; i < initCount; i++) - { - GetEmitter()->emitIns_R_R_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i); - } - } - else - { - for (unsigned i = 0; i < initCount; i++) - { - GetEmitter()->emitIns_R_R_I_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i, 0); - } - } - - // Load the initialized value. - GetEmitter()->emitIns_Mov(INS_mov, EA_16BYTE, targetReg, vectorReg, /* canSkip */ true); - - genProduceReg(simdNode); -} - //----------------------------------------------------------------------------- // genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to // the given register, if any, or to memory. diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 12ced2641d167..e8c9c2e79212f 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -4913,21 +4913,6 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) NYI("unimplemented on LOONGARCH64 yet"); } -//------------------------------------------------------------------------------------------- -// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes -// a number of arguments equal to the length of the Vector. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - //---------------------------------------------------------------------------------- // genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt. // diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 71cb998eb5878..ac751d04d8e83 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -8705,20 +8705,6 @@ inline bool GenTree::IsVectorCreate() const } #endif // FEATURE_HW_INTRINSICS -#ifdef FEATURE_SIMD - if (OperIs(GT_SIMD)) - { - switch (AsSIMD()->GetSIMDIntrinsicId()) - { - case SIMDIntrinsicInitN: - return true; - - default: - return false; - } - } -#endif // FEATURE_SIMD - return false; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 572a46c8d3bc8..9cfd80c28ae3b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7588,54 +7588,6 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) simdNode->gtType = TYP_SIMD16; } - if (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInitN) - { - assert(simdNode->GetSimdBaseType() == TYP_FLOAT); - - size_t argCount = simdNode->GetOperandCount(); - size_t constArgCount = 0; - float constArgValues[4]{0, 0, 0, 0}; - - for (GenTree* arg : simdNode->Operands()) - { - assert(arg->TypeIs(simdNode->GetSimdBaseType())); - - if (arg->IsCnsFltOrDbl()) - { - noway_assert(constArgCount < ArrLen(constArgValues)); - constArgValues[constArgCount] = static_cast(arg->AsDblCon()->DconValue()); - constArgCount++; - } - } - - if (constArgCount == argCount) - { - for (GenTree* arg : simdNode->Operands()) - { - BlockRange().Remove(arg); - } - - // For SIMD12, even though there might be 12 bytes of constants, we need to store 16 bytes of data - // since we've bashed the node the TYP_SIMD16 and do a 16-byte indirection. - assert(varTypeIsSIMD(simdNode)); - const unsigned cnsSize = genTypeSize(simdNode); - assert(cnsSize <= sizeof(constArgValues)); - - const unsigned cnsAlign = - (comp->compCodeOpt() != Compiler::SMALL_CODE) ? cnsSize : emitter::dataSection::MIN_DATA_ALIGN; - - CORINFO_FIELD_HANDLE hnd = - comp->GetEmitter()->emitBlkConst(constArgValues, cnsSize, cnsAlign, simdNode->GetSimdBaseType()); - GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(TYP_I_IMPL, hnd); - BlockRange().InsertBefore(simdNode, clsVarAddr); - simdNode->ChangeOper(GT_IND); - simdNode->AsOp()->gtOp1 = clsVarAddr; - ContainCheckIndir(simdNode->AsIndir()); - - return; - } - } - ContainCheckSIMD(simdNode); } #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index c816d33834032..18852899492ba 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -818,29 +818,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) switch (simdTree->GetSIMDIntrinsicId()) { - case SIMDIntrinsicInitN: - { - var_types baseType = simdTree->GetSimdBaseType(); - srcCount = (short)(simdTree->GetSimdSize() / genTypeSize(baseType)); - assert(simdTree->GetOperandCount() == static_cast(srcCount)); - if (varTypeIsFloating(simdTree->GetSimdBaseType())) - { - // Need an internal register to stitch together all the values into a single vector in a SIMD reg. - buildInternalFloatRegisterDefForNode(simdTree); - } - - for (GenTree* operand : simdTree->Operands()) - { - assert(operand->TypeIs(baseType)); - assert(!operand->isContained()); - - BuildUse(operand); - } - - buildUses = false; - break; - } - case SIMDIntrinsicInitArray: // We have an array and an index, which may be contained. break; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 682fcec159ff6..00177092c3942 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1929,27 +1929,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) switch (simdTree->GetSIMDIntrinsicId()) { - case SIMDIntrinsicInitN: - { - var_types baseType = simdTree->GetSimdBaseType(); - srcCount = (short)(simdTree->GetSimdSize() / genTypeSize(baseType)); - assert(simdTree->GetOperandCount() == static_cast(srcCount)); - - // Need an internal register to stitch together all the values into a single vector in a SIMD reg. - buildInternalFloatRegisterDefForNode(simdTree); - - for (GenTree* operand : simdTree->Operands()) - { - assert(operand->TypeIs(baseType)); - assert(!operand->isContained()); - - BuildUse(operand); - } - - buildUses = false; - } - break; - case SIMDIntrinsicInitArray: // We have an array and an index, which may be contained. break; diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index b66db85f71ba9..3f529e588dfcf 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -1014,27 +1014,11 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in unsigned int fixedArgCnt = simdIntrinsicInfoArray[i].argCount; unsigned int expectedArgCnt = fixedArgCnt; - // First handle SIMDIntrinsicInitN, where the arg count depends on the type. // The listed arg types include the vector and the first two init values, which is the expected number // for Vector2. For other cases, we'll check their types here. if (*argCount > expectedArgCnt) { - if (i == SIMDIntrinsicInitN) - { - if (*argCount == 3 && typeHnd == m_simdHandleCache->SIMDVector2Handle) - { - expectedArgCnt = 3; - } - else if (*argCount == 4 && typeHnd == m_simdHandleCache->SIMDVector3Handle) - { - expectedArgCnt = 4; - } - else if (*argCount == 5 && typeHnd == m_simdHandleCache->SIMDVector4Handle) - { - expectedArgCnt = 5; - } - } - else if (i == SIMDIntrinsicInitFixed) + if (i == SIMDIntrinsicInitFixed) { if (*argCount == 4 && typeHnd == m_simdHandleCache->SIMDVector4Handle) { @@ -1700,89 +1684,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, switch (simdIntrinsicID) { - case SIMDIntrinsicInitN: - { - // SIMDIntrinsicInitN - // op2 - list of initializer values stitched into a list - // op1 - byref of vector - IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), argCount - 1); - bool initFromFirstArgIndir = false; - - { - assert(simdIntrinsicID == SIMDIntrinsicInitN); - assert(simdBaseType == TYP_FLOAT); - - unsigned initCount = argCount - 1; - unsigned elementCount = getSIMDVectorLength(size, simdBaseType); - noway_assert(initCount == elementCount); - - // Build an array with the N values. - // We must maintain left-to-right order of the args, but we will pop - // them off in reverse order (the Nth arg was pushed onto the stack last). - - GenTree* prevArg = nullptr; - bool areArgsContiguous = true; - for (unsigned i = 0; i < initCount; i++) - { - GenTree* arg = impSIMDPopStack(simdBaseType); - - if (areArgsContiguous) - { - GenTree* curArg = arg; - - if (prevArg != nullptr) - { - // Recall that we are popping the args off the stack in reverse order. - areArgsContiguous = areArgumentsContiguous(curArg, prevArg); - } - prevArg = curArg; - } - - assert(genActualType(arg) == genActualType(simdBaseType)); - nodeBuilder.AddOperand(initCount - i - 1, arg); - } - - if (areArgsContiguous && simdBaseType == TYP_FLOAT) - { - // Since Vector2, Vector3 and Vector4's arguments type are only float, - // we initialize the vector from first argument address, only when - // the simdBaseType is TYP_FLOAT and the arguments are located contiguously in memory - initFromFirstArgIndir = true; - GenTree* op2Address = createAddressNodeForSIMDInit(nodeBuilder.GetOperand(0), size); - var_types simdType = getSIMDTypeForSize(size); - op2 = gtNewOperNode(GT_IND, simdType, op2Address); - } - } - - op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); - - assert(op1->TypeGet() == TYP_BYREF); - - { - assert(!varTypeIsSmallInt(simdBaseType)); - - if (initFromFirstArgIndir) - { - simdTree = op2; - if (op1->OperIs(GT_LCL_VAR_ADDR)) - { - // label the dst struct's lclvar is used for SIMD intrinsic, - // so that this dst struct won't be promoted. - setLclRelatedToSIMDIntrinsic(op1); - } - } - else - { - simdTree = new (this, GT_SIMD) - GenTreeSIMD(simdType, std::move(nodeBuilder), simdIntrinsicID, simdBaseJitType, size); - } - } - - copyBlkDst = op1; - doCopyBlk = true; - } - break; - case SIMDIntrinsicInitArray: case SIMDIntrinsicInitArrayX: case SIMDIntrinsicCopyToArray: diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index ced687fcff017..d399be5a7904e 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -128,159 +128,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type return result; } -// genSIMDScalarMove: Generate code to move a value of type "type" from src mm reg -// to target mm reg, zeroing out the upper bits if and only if specified. -// -// Arguments: -// targetType the target type -// baseType the base type of value to be moved -// targetReg the target reg -// srcReg the src reg -// moveType action to be performed on target upper bits -// -// Return Value: -// None -// -// Notes: -// This is currently only supported for floating point types. -// -void CodeGen::genSIMDScalarMove( - var_types targetType, var_types baseType, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType) -{ - assert(varTypeIsFloating(baseType)); - switch (moveType) - { - case SMT_PreserveUpper: - GetEmitter()->emitIns_SIMD_R_R_R(ins_Store(baseType), emitTypeSize(baseType), targetReg, targetReg, srcReg); - break; - - case SMT_ZeroInitUpper: - if (compiler->canUseVexEncoding()) - { - // insertps is a 128-bit only instruction, and clears the upper 128 bits, which is what we want. - // The insertpsImm selects which fields are copied and zero'd of the lower 128 bits, so we choose - // to zero all but the lower bits. - unsigned int insertpsImm = - (INSERTPS_TARGET_SELECT(0) | INSERTPS_ZERO(1) | INSERTPS_ZERO(2) | INSERTPS_ZERO(3)); - assert((insertpsImm >= 0) && (insertpsImm <= 255)); - inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, srcReg, (int8_t)insertpsImm); - } - else - { - if (srcReg == targetReg) - { - // There is no guarantee that upper bits of op1Reg are zero. - // We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes. - instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16); - GetEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); - ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); - GetEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); - } - else - { - genSIMDZero(targetType, TYP_FLOAT, targetReg); - inst_Mov(baseType, targetReg, srcReg, /* canSkip */ false); - } - } - break; - - case SMT_ZeroInitUpper_SrcHasUpperZeros: - inst_Mov(baseType, targetReg, srcReg, /* canSkip */ true); - break; - - default: - unreached(); - } -} - -void CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg) -{ - // We just use `INS_xorps` since `genSIMDZero` is used for both `System.Numerics.Vectors` and - // HardwareIntrinsics. Modern CPUs handle this specially in the renamer and it never hits the - // execution pipeline, additionally `INS_xorps` is always available (when using either the - // legacy or VEX encoding). - inst_RV_RV(INS_xorps, targetReg, targetReg, targetType, emitActualTypeSize(targetType)); -} - -//------------------------------------------------------------------------------------------- -// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes -// a number of arguments equal to the length of the Vector. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) -{ - assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInitN); - - // Right now this intrinsic is supported only on TYP_FLOAT vectors - var_types baseType = simdNode->GetSimdBaseType(); - noway_assert(baseType == TYP_FLOAT); - - regNumber targetReg = simdNode->GetRegNum(); - assert(targetReg != REG_NA); - - var_types targetType = simdNode->TypeGet(); - - // Note that we cannot use targetReg before consumed all source operands. Therefore, - // Need an internal register to stitch together all the values into a single vector - // in an XMM reg. - regNumber vectorReg = simdNode->GetSingleTempReg(); - - // Zero out vectorReg if we are constructing a vector whose size is not equal to targetType vector size. - // For example in case of Vector4f we don't need to zero when using SSE2. - if (compiler->isSubRegisterSIMDType(simdNode)) - { - genSIMDZero(targetType, baseType, vectorReg); - } - - unsigned int baseTypeSize = genTypeSize(baseType); - instruction insLeftShift = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16); - - // We will first consume the list items in execution (left to right) order, - // and record the registers. - regNumber operandRegs[SIMD_INTRINSIC_MAX_PARAM_COUNT]; - size_t initCount = simdNode->GetOperandCount(); - for (size_t i = 1; i <= initCount; i++) - { - GenTree* operand = simdNode->Op(i); - assert(operand->TypeIs(baseType)); - assert(!operand->isContained()); - - operandRegs[i - 1] = genConsumeReg(operand); - } - - unsigned offset = 0; - for (unsigned i = 0; i < initCount; i++) - { - // We will now construct the vector from the list items in reverse order. - // This allows us to efficiently stitch together a vector as follows: - // vectorReg = (vectorReg << offset) - // VectorReg[0] = listItemReg - // Use genSIMDScalarMove with SMT_PreserveUpper in order to ensure that the upper - // bits of vectorReg are not modified. - - regNumber operandReg = operandRegs[initCount - i - 1]; - if (offset != 0) - { - assert((baseTypeSize >= 0) && (baseTypeSize <= 255)); - GetEmitter()->emitIns_R_I(insLeftShift, EA_16BYTE, vectorReg, (int8_t)baseTypeSize); - } - genSIMDScalarMove(targetType, baseType, vectorReg, operandReg, SMT_PreserveUpper); - - offset += baseTypeSize; - } - - noway_assert(offset == simdNode->GetSimdSize()); - - // Load the initialized value. - inst_Mov(targetType, targetReg, vectorReg, /* canSkip */ true); - genProduceReg(simdNode); -} - //-------------------------------------------------------------------------------- // genSIMDExtractUpperHalf: Generate code to extract the upper half of a SIMD register // @@ -654,10 +501,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) switch (simdNode->GetSIMDIntrinsicId()) { - case SIMDIntrinsicInitN: - genSIMDIntrinsicInitN(simdNode); - break; - case SIMDIntrinsicUpperSave: genSIMDIntrinsicUpperSave(simdNode); break; diff --git a/src/coreclr/jit/simdintrinsiclist.h b/src/coreclr/jit/simdintrinsiclist.h index 54654645c1fa2..a0693a8a1aeb1 100644 --- a/src/coreclr/jit/simdintrinsiclist.h +++ b/src/coreclr/jit/simdintrinsiclist.h @@ -43,11 +43,6 @@ SIMD_INTRINSIC(nullptr, false, None, SIMD_INTRINSIC(".ctor", true, InitArray, "initArray", TYP_VOID, 2, {TYP_BYREF, TYP_REF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) // This form takes the object, an array of the base (element) type, and an index into the array: SIMD_INTRINSIC(".ctor", true, InitArrayX, "initArray", TYP_VOID, 3, {TYP_BYREF, TYP_REF, TYP_INT }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -// This form takes the object, and N values of the base (element) type. The actual number of arguments depends upon the Vector size, which must be a fixed type such as Vector2f/3f/4f -// Right now this intrinsic is supported only on fixed float vectors and hence the supported base types lists only TYP_FLOAT. -// This is currently the intrinsic that has the largest maximum number of operands - if we add new fixed vector types -// with more than 4 elements, the above SIMD_INTRINSIC_MAX_PARAM_COUNT will have to change. -SIMD_INTRINSIC(".ctor", true, InitN, "initN", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) // This form takes the object, a smaller fixed vector, and one or two additional arguments of the base type, e.g. Vector3 V = new Vector3(V2, x); where V2 is a Vector2, and x is a float. SIMD_INTRINSIC(".ctor", true, InitFixed, "initFixed", TYP_VOID, 3, {TYP_BYREF, TYP_STRUCT, TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) From 664b347727696f98e456f3651cdf4179a683cbfb Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 07:00:44 -0800 Subject: [PATCH 03/20] Remove SIMDIntrinsicShiftLeftInternal and SIMDIntrinsicShiftRightInternal as they are dead --- src/coreclr/jit/codegen.h | 3 - src/coreclr/jit/codegenloongarch64.cpp | 17 ------ src/coreclr/jit/simdcodegenxarch.cpp | 83 +------------------------- src/coreclr/jit/simdintrinsiclist.h | 6 -- 4 files changed, 1 insertion(+), 108 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 69f95ec94fa90..659457c0cbb6a 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1077,9 +1077,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifdef FEATURE_SIMD #ifdef TARGET_ARM64 insOpts genGetSimdInsOpt(emitAttr size, var_types elementType); -#endif -#ifdef TARGET_XARCH - instruction getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival = nullptr); #endif void genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode); void genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index e8c9c2e79212f..dddc5baa5d9de 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -4882,23 +4882,6 @@ insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) return INS_OPTS_NONE; } -// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic -// -// Arguments: -// intrinsicId - SIMD intrinsic Id -// baseType - Base type of the SIMD vector -// immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode -// -// -// Return Value: -// Instruction (op) to be used, and immed is set if instruction requires an immediate operand. -// -instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/) -{ - NYI("unimplemented on LOONGARCH64 yet"); - return INS_invalid; -} - //------------------------------------------------------------------------ // genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize. // diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index d399be5a7904e..bd1e379159d0c 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -48,86 +48,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define ROUNDPS_TOWARD_POSITIVE_INFINITY_IMM 0b1010 #define ROUNDPS_TOWARD_ZERO_IMM 0b1011 -// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic -// -// Arguments: -// intrinsicId - SIMD intrinsic Id -// baseType - Base type of the SIMD vector -// ival - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode -// -// -// Return Value: -// Instruction (op) to be used, and ival is set if instruction requires an immediate operand. -// -instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/) -{ - // Minimal required instruction set is SSE2. - assert(compiler->getSIMDSupportLevel() >= SIMD_SSE2_Supported); - - instruction result = INS_invalid; - switch (intrinsicId) - { - case SIMDIntrinsicShiftLeftInternal: - switch (baseType) - { - case TYP_SIMD16: - // For SSE2, entire vector is shifted, for AVX2, 16-byte chunks are shifted. - result = INS_pslldq; - break; - case TYP_UINT: - case TYP_INT: - result = INS_pslld; - break; - case TYP_SHORT: - case TYP_USHORT: - result = INS_psllw; - break; - default: - assert(!"Invalid baseType for SIMDIntrinsicShiftLeftInternal"); - result = INS_invalid; - break; - } - break; - - case SIMDIntrinsicShiftRightInternal: - switch (baseType) - { - case TYP_SIMD16: - // For SSE2, entire vector is shifted, for AVX2, 16-byte chunks are shifted. - result = INS_psrldq; - break; - case TYP_UINT: - case TYP_INT: - result = INS_psrld; - break; - case TYP_SHORT: - case TYP_USHORT: - result = INS_psrlw; - break; - default: - assert(!"Invalid baseType for SIMDIntrinsicShiftRightInternal"); - result = INS_invalid; - break; - } - break; - - case SIMDIntrinsicUpperSave: - result = INS_vextractf128; - break; - - case SIMDIntrinsicUpperRestore: - result = INS_insertps; - break; - - default: - assert(!"Unsupported SIMD intrinsic"); - unreached(); - } - - noway_assert(result != INS_invalid); - return result; -} - //-------------------------------------------------------------------------------- // genSIMDExtractUpperHalf: Generate code to extract the upper half of a SIMD register // @@ -149,9 +69,8 @@ void CodeGen::genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, r } else { - instruction shiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); inst_Mov(simdType, tgtReg, srcReg, /* canSkip */ true); - GetEmitter()->emitIns_R_I(shiftIns, emitSize, tgtReg, 8); + GetEmitter()->emitIns_R_I(INS_psrldq, emitSize, tgtReg, 8); } } diff --git a/src/coreclr/jit/simdintrinsiclist.h b/src/coreclr/jit/simdintrinsiclist.h index a0693a8a1aeb1..1f6d18873baa4 100644 --- a/src/coreclr/jit/simdintrinsiclist.h +++ b/src/coreclr/jit/simdintrinsiclist.h @@ -50,12 +50,6 @@ SIMD_INTRINSIC(".ctor", true, InitFixed, SIMD_INTRINSIC("CopyTo", true, CopyToArray, "CopyToArray", TYP_VOID, 2, {TYP_BYREF, TYP_REF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("CopyTo", true, CopyToArrayX, "CopyToArray", TYP_VOID, 3, {TYP_BYREF, TYP_REF, TYP_INT }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -#ifdef TARGET_XARCH -// Internal, logical shift operations that shift the entire vector register instead of individual elements of the vector. -SIMD_INTRINSIC("ShiftLeftInternal", false, ShiftLeftInternal, "<< Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -SIMD_INTRINSIC("ShiftRightInternal", false, ShiftRightInternal, ">> Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -#endif // TARGET_XARCH - // Internal intrinsics for saving & restoring the upper half of a vector register SIMD_INTRINSIC("UpperSave", false, UpperSave, "UpperSave Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) SIMD_INTRINSIC("UpperRestore", false, UpperRestore, "UpperRestore Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) From 330245260f8e711df183347f05c526d85084df69 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 07:03:44 -0800 Subject: [PATCH 04/20] Remove some other dead functions from the legacy SIMD support --- src/coreclr/jit/codegen.h | 9 --------- src/coreclr/jit/simdcodegenxarch.cpp | 26 -------------------------- 2 files changed, 35 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 659457c0cbb6a..ef14cf55bbd4c 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1080,15 +1080,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif void genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode); void genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode); - void genSIMDLo64BitConvert(SIMDIntrinsicID intrinsicID, - var_types simdType, - var_types baseType, - regNumber tmpReg, - regNumber tmpIntReg, - regNumber targetReg); - void genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode); - void genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode); - void genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, regNumber tgtReg); void genSIMDIntrinsic(GenTreeSIMD* simdNode); // TYP_SIMD12 (i.e Vector3 of size 12 bytes) is not a hardware supported size and requires diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index bd1e379159d0c..a05e281fa54ef 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -48,32 +48,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define ROUNDPS_TOWARD_POSITIVE_INFINITY_IMM 0b1010 #define ROUNDPS_TOWARD_ZERO_IMM 0b1011 -//-------------------------------------------------------------------------------- -// genSIMDExtractUpperHalf: Generate code to extract the upper half of a SIMD register -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Notes: -// This is used for the WidenHi intrinsic to extract the upper half. -// On SSE*, this is 8 bytes, and on AVX2 it is 16 bytes. -// -void CodeGen::genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, regNumber tgtReg) -{ - var_types simdType = simdNode->TypeGet(); - emitAttr emitSize = emitActualTypeSize(simdType); - if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) - { - instruction extractIns = varTypeIsFloating(simdNode->GetSimdBaseType()) ? INS_vextractf128 : INS_vextracti128; - GetEmitter()->emitIns_R_R_I(extractIns, EA_32BYTE, tgtReg, srcReg, 0x01); - } - else - { - inst_Mov(simdType, tgtReg, srcReg, /* canSkip */ true); - GetEmitter()->emitIns_R_I(INS_psrldq, emitSize, tgtReg, 8); - } -} - //----------------------------------------------------------------------------- // genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory. // Since Vector3 is not a hardware supported write size, it is performed From 09b4b3cabb6d53b92335d896d0979e7e9788a2e8 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 07:50:46 -0800 Subject: [PATCH 05/20] Improve the codegen for float Sse41.Insert when zero is involved --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 20 +- src/coreclr/jit/lowerxarch.cpp | 247 +++++++++++++++++++- 2 files changed, 251 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index b2b08c8d828c8..b44ed4c7d5787 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -596,11 +596,23 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, regNumber op1Reg = op1->GetRegNum(); - if ((ins == INS_insertps) && (op1Reg == REG_NA)) + if (ins == INS_insertps) { - // insertps is special and can contain op1 when it is zero - assert(op1->isContained() && op1->IsVectorZero()); - op1Reg = targetReg; + if (op1Reg == REG_NA) + { + // insertps is special and can contain op1 when it is zero + assert(op1->isContained() && op1->IsVectorZero()); + op1Reg = targetReg; + } + + if (op2->isContained() && op2->IsVectorZero()) + { + // insertps can also contain op2 when it is zero in which case + // we just reuse op1Reg since ival specifies the entry to zero + + emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op1Reg, ival); + return; + } } assert(targetReg != REG_NA); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 255f9b968caf6..5e40ea3408a73 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1126,11 +1126,211 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { assert(node->GetOperandCount() == 3); + var_types simdBaseType = node->GetSimdBaseType(); + // Insert takes either a 32-bit register or a memory operand. // In either case, only SimdBaseType bits are read and so // widening or narrowing the operand may be unnecessary and it // can just be used directly. - node->Op(2) = TryRemoveCastIfPresent(node->GetSimdBaseType(), node->Op(2)); + + node->Op(2) = TryRemoveCastIfPresent(simdBaseType, node->Op(2)); + + if (simdBaseType != TYP_FLOAT) + { + break; + } + assert(intrinsicId == NI_SSE41_Insert); + + // We have Sse41.Insert in which case we can specially handle + // a couple of interesting scenarios involving chains of Inserts + // where one of them involves inserting zero + // + // Given Sse41.Insert has an index: + // * Bits 0-3: zmask + // * Bits 4-5: count_d + // * Bits 6-7: count_s (register form only) + // + // Where zmask specifies which elements to zero + // Where count_d specifies the destination index the value is being inserted to + // Where count_s specifies the source index of the value being inserted + // + // We can recognize `Insert(Insert(vector, zero, index1), value, index2)` and + // transform it into just `Insert(vector, value, index)`. This is because we + // can remove the inner insert and update the relevant index fields. + // + // We can likewise recognize `Insert(Insert(vector, value, index1), zero, index2)` + // and do a similar transformation. + + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); + + bool op1IsVectorZero = op1->IsVectorZero(); + bool op2IsVectorZero = op2->IsVectorZero(); + + if (op1IsVectorZero && op2IsVectorZero) + { + // While this case is unlikely, we'll handle it here to simplify some + // of the logic that exists below. Effectively `Insert(zero, zero, idx)` + // is always going to produce zero, so we'll just replace ourselves with + // zero. This ensures we don't need to handle a case where op2 is zero + // but not contained. + + LIR::Use use; + + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(op1); + } + else + { + op1->SetUnusedValue(); + } + + BlockRange().Remove(op2); + op3->SetUnusedValue(); + BlockRange().Remove(node); + + return op1->gtNext; + } + + if (!op3->IsCnsIntOrI()) + { + // Nothing to do if op3 isn't a constant + break; + } + + ssize_t ival = op3->AsIntConCommon()->IconValue(); + + ssize_t zmask = (ival & 0x0F); + ssize_t count_d = (ival & 0x30) >> 4; + ssize_t count_s = (ival & 0xC0) >> 6; + + if (op1IsVectorZero) + { + // When op1 is zero, we can modify the mask to zero + // everything except for the element we're inserting + + zmask |= ~(ssize_t(1) << count_d); + zmask &= 0x0F; + + ival = (count_s << 6) | (count_d << 4) | (zmask); + op3->AsIntConCommon()->SetIconValue(ival); + } + else if(op2IsVectorZero) + { + // When op2 is zero, we can modify the mask to + // directly zero the element we're inserting + + zmask |= (ssize_t(1) << count_d); + zmask &= 0x0F; + + ival = (count_s << 6) | (count_d << 4) | (zmask); + op3->AsIntConCommon()->SetIconValue(ival); + } + + if (zmask == 0x0F) + { + // This is another unlikely case, we'll handle it here to simplify some + // of the logic that exists below. In this case, the zmask says all entries + // should be zeroed out, so we'll just replace ourselves with zero. + + GenTree* nextNode = node->gtNext; + + LIR::Use use; + + if (BlockRange().TryGetUse(node, &use)) + { + GenTree* zeroNode = comp->gtNewZeroConNode(TYP_SIMD16); + BlockRange().InsertBefore(node, zeroNode); + use.ReplaceWith(zeroNode); + } + else + { + // We're an unused zero constant node, so don't both creating + // a new node for something that will never be consumed + } + + op1->SetUnusedValue(); + op2->SetUnusedValue(); + op3->SetUnusedValue(); + BlockRange().Remove(node); + + return nextNode; + } + + if (!op1->OperIsHWIntrinsic()) + { + // Nothing to do if op1 isn't an intrinsic + break; + } + + GenTreeHWIntrinsic* op1Intrinsic = op1->AsHWIntrinsic(); + + if ((op1Intrinsic->GetHWIntrinsicId() != NI_SSE41_Insert) || + (op1Intrinsic->GetSimdBaseType() != TYP_FLOAT)) + { + // Nothing to do if op1 isn't a float32 Sse41.Insert + break; + } + + GenTree* op1Idx = op1Intrinsic->Op(3); + + if (!op1Idx->IsCnsIntOrI()) + { + // Nothing to do if op1's index isn't a constant + break; + } + + if (!IsSafeToContainMem(node, op1)) + { + // What we're doing here is effectively similar to containment, + // except for we're deleting the node entirely, so don't we have + // nothing to do if there are side effects between node and op1 + break; + } + + if (op1Intrinsic->Op(2)->IsVectorZero()) + { + // First build up the new index by updating zmask to include + // the zmask from op1. We expect that op2 has already been + // lowered and therefore the containment checks have happened + + assert(op1Intrinsic->Op(2)->isContained()); + + ssize_t op1Ival = op1Idx->AsIntConCommon()->IconValue(); + ival |= (op1Ival & 0x0F); + op3->AsIntConCommon()->SetIconValue(ival); + + // Then we'll just carry the original non-zero input and + // remove the now unused constant nodes + + node->Op(1) = op1Intrinsic->Op(1); + + BlockRange().Remove(op1Intrinsic->Op(2)); + BlockRange().Remove(op1Intrinsic->Op(3)); + BlockRange().Remove(op1Intrinsic); + } + else if (op2IsVectorZero) + { + // Since we've already updated zmask to take op2 being zero into + // account, we can basically do the same thing here by merging this + // zmask into the ival from op1. + + ssize_t op1Ival = op1Idx->AsIntConCommon()->IconValue(); + ival = op1Ival | zmask; + op3->AsIntConCommon()->SetIconValue(ival); + + // Then we'll just carry the inputs from op1 and remove the now + // unused constant nodes + + node->Op(1) = op1Intrinsic->Op(1); + node->Op(2) = op1Intrinsic->Op(2); + + BlockRange().Remove(op2); + BlockRange().Remove(op1Intrinsic->Op(3)); + BlockRange().Remove(op1Intrinsic); + } break; } @@ -7132,24 +7332,47 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // Where count_d specifies the destination index the value is being inserted to // Where count_s specifies the source index of the value being inserted - ssize_t ival = lastOp->AsIntConCommon()->IconValue(); - - ssize_t zmask = (ival & 0x0F); - ssize_t count_d = (ival & 0x30) >> 4; - ssize_t count_s = (ival & 0xC0) >> 6; - if (op1->IsVectorZero()) { - // When op1 is zero, we can contain op1 and modify the mask - // to zero everything except for the element we're inserting to + // When op1 is zero, we can contain it and we expect that + // ival is already in the correct state to account for it + +#if DEBUG + ssize_t ival = lastOp->AsIntConCommon()->IconValue(); + + ssize_t zmask = (ival & 0x0F); + ssize_t count_d = (ival & 0x30) >> 4; + ssize_t count_s = (ival & 0xC0) >> 6; + + zmask |= ~(ssize_t(1) << count_d); + zmask &= 0x0F; + + ssize_t expected = (count_s << 6) | (count_d << 4) | (zmask); + assert(ival == expected); +#endif MakeSrcContained(node, op1); + } + else if (op2->IsVectorZero()) + { + // When op2 is zero, we can contain it and we expect that + // zmask is already in the correct state to account for it - zmask |= ~(1 << count_d); +#if DEBUG + ssize_t ival = lastOp->AsIntConCommon()->IconValue(); + + ssize_t zmask = (ival & 0x0F); + ssize_t count_d = (ival & 0x30) >> 4; + ssize_t count_s = (ival & 0xC0) >> 6; + + zmask |= (ssize_t(1) << count_d); zmask &= 0x0F; - ival = (count_s << 6) | (count_d << 4) | (zmask); - lastOp->AsIntConCommon()->SetIconValue(ival); + ssize_t expected = (count_s << 6) | (count_d << 4) | (zmask); + assert(ival == expected); +#endif + + MakeSrcContained(node, op2); } } From 471d67eaad00fe8e500ff598f18f2061380d13a1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 10:11:24 -0800 Subject: [PATCH 06/20] Preserve the handling around InitN for Vector2/3/4 and operands that are contiguous in memory --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/lclmorph.cpp | 2 +- src/coreclr/jit/simd.cpp | 12 ++++++------ src/coreclr/jit/simdashwintrinsic.cpp | 16 ++++++++++++++++ 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 70432157420ff..8cce22548c6b8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8606,7 +8606,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool areLocalFieldsContiguous(GenTreeLclFld* first, GenTreeLclFld* second); bool areArrayElementsContiguous(GenTree* op1, GenTree* op2); bool areArgumentsContiguous(GenTree* op1, GenTree* op2); - GenTree* createAddressNodeForSIMDInit(GenTree* tree, unsigned simdSize); + GenTree* CreateAddressNodeForSimdHWIntrinsicCreate(GenTree* tree, var_types simdBaseType, unsigned simdSize); // check methodHnd to see if it is a SIMD method that is expanded as an intrinsic in the JIT. GenTree* impSIMDIntrinsic(OPCODE opcode, diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 27d0f5ba41613..22b1cb41d6f14 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1487,7 +1487,7 @@ bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* } else { - GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize); + GenTree* copyBlkDst = CreateAddressNodeForSimdHWIntrinsicCreate(originalLHS, TYP_FLOAT, simdSize); dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst); } diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 3f529e588dfcf..0d265d7cb6a52 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -1444,13 +1444,13 @@ bool Compiler::areArgumentsContiguous(GenTree* op1, GenTree* op2) } //-------------------------------------------------------------------------------------------------------- -// createAddressNodeForSIMDInit: Generate the address node if we want to initialize vector2, vector3 or vector4 +// CreateAddressNodeForSimdHWIntrinsicCreate: Generate the address node if we want to initialize a simd type // from first argument's address. // // Arguments: -// tree - GenTree*. This the tree node which is used to get the address for indir. -// simdsize - unsigned. This the simd vector size. -// arrayElementsCount - unsigned. This is used for generating the boundary check for array. +// tree - The tree node which is used to get the address for indir. +// simdBaseType - The type of the elements in the SIMD node +// simdsize - The simd vector size. // // Return value: // return the address node. @@ -1459,7 +1459,7 @@ bool Compiler::areArgumentsContiguous(GenTree* op1, GenTree* op2) // Currently just supports GT_FIELD and GT_IND(GT_INDEX_ADDR), because we can only verify those nodes // are located contiguously or not. In future we should support more cases. // -GenTree* Compiler::createAddressNodeForSIMDInit(GenTree* tree, unsigned simdSize) +GenTree* Compiler::CreateAddressNodeForSimdHWIntrinsicCreate(GenTree* tree, var_types simdBaseType, unsigned simdSize) { GenTree* byrefNode = nullptr; unsigned offset = 0; @@ -1508,7 +1508,7 @@ GenTree* Compiler::createAddressNodeForSIMDInit(GenTree* tree, unsigned simdSize // The length for boundary check should be the maximum index number which should be // (first argument's index number) + (how many array arguments we have) - 1 // = indexVal + arrayElementsCount - 1 - unsigned arrayElementsCount = simdSize / genTypeSize(baseType); + unsigned arrayElementsCount = simdSize / genTypeSize(simdBaseType); checkIndexExpr = gtNewIconNode(indexVal + arrayElementsCount - 1); GenTreeArrLen* arrLen = gtNewArrLen(TYP_INT, arrayRef, (int)OFFSETOF__CORINFO_Array__length, compCurBB); GenTreeBoundsChk* arrBndsChk = diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 18072ad2ca654..cdfb88d7f46a4 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -1873,6 +1873,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, copyBlkSrc = vecCon; } + else if (areArgumentsContiguous(op2, op3)) + { + GenTree* op2Address = CreateAddressNodeForSimdHWIntrinsicCreate(op2, simdBaseType, 8); + op2 = gtNewOperNode(GT_IND, TYP_SIMD8, op2Address); + } else { #if defined(TARGET_XARCH) @@ -1964,6 +1969,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, copyBlkSrc = vecCon; } + else if (areArgumentsContiguous(op2, op3) && areArgumentsContiguous(op3, op4)) + { + GenTree* op2Address = CreateAddressNodeForSimdHWIntrinsicCreate(op2, simdBaseType, 12); + op2 = gtNewOperNode(GT_IND, TYP_SIMD12, op2Address); + } else { IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), 4); @@ -2053,6 +2063,12 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, copyBlkSrc = vecCon; } + else if (areArgumentsContiguous(op2, op3) && areArgumentsContiguous(op3, op4) && + areArgumentsContiguous(op4, op5)) + { + GenTree* op2Address = CreateAddressNodeForSimdHWIntrinsicCreate(op2, simdBaseType, 16); + op2 = gtNewOperNode(GT_IND, TYP_SIMD16, op2Address); + } else { IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), 4); From 40e24991d38bc4e59dd5819dd82f3057198b26c9 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 10:26:32 -0800 Subject: [PATCH 07/20] Extend the contiguous argument handling to Vector64/128/256 --- src/coreclr/jit/hwintrinsicarm64.cpp | 32 ++++++++++++++++++++++++++-- src/coreclr/jit/hwintrinsicxarch.cpp | 32 ++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index fca2ff56aae8a..40421f1170d55 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -686,12 +686,40 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs); + // TODO-CQ: We don't handle contiguous args for anything except TYP_FLOAT today + + GenTree* prevArg = nullptr; + bool areArgsContiguous = (simdBaseType == TYP_FLOAT); + for (int i = sig->numArgs - 1; i >= 0; i--) { - nodeBuilder.AddOperand(i, impPopStack().val); + GenTree* arg = impPopStack().val; + + if (areArgsContiguous) + { + if (prevArg != nullptr) + { + // Recall that we are popping the args off the stack in reverse order. + areArgsContiguous = areArgumentsContiguous(arg, prevArg); + } + + prevArg = arg; + } + + nodeBuilder.AddOperand(i, arg); } - retNode = gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); + if (areArgsContiguous) + { + op1 = nodeBuilder.GetOperand(0); + GenTree* op1Address = CreateAddressNodeForSimdHWIntrinsicCreate(op1, simdBaseType, 16); + retNode = gtNewOperNode(GT_IND, TYP_SIMD16, op1Address); + } + else + { + retNode = + gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); + } break; } diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 40ef0bc8078ad..3b32895ef6508 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1058,12 +1058,40 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs); + // TODO-CQ: We don't handle contiguous args for anything except TYP_FLOAT today + + GenTree* prevArg = nullptr; + bool areArgsContiguous = (simdBaseType == TYP_FLOAT); + for (int i = sig->numArgs - 1; i >= 0; i--) { - nodeBuilder.AddOperand(i, impPopStack().val); + GenTree* arg = impPopStack().val; + + if (areArgsContiguous) + { + if (prevArg != nullptr) + { + // Recall that we are popping the args off the stack in reverse order. + areArgsContiguous = areArgumentsContiguous(arg, prevArg); + } + + prevArg = arg; + } + + nodeBuilder.AddOperand(i, arg); } - retNode = gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); + if (areArgsContiguous) + { + op1 = nodeBuilder.GetOperand(0); + GenTree* op1Address = CreateAddressNodeForSimdHWIntrinsicCreate(op1, simdBaseType, 16); + retNode = gtNewOperNode(GT_IND, TYP_SIMD16, op1Address); + } + else + { + retNode = + gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); + } break; } From 257a7a743ef1fb9156f6eb4ce836ce480b577e4a Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 11:15:55 -0800 Subject: [PATCH 08/20] Fixing how `this` is spilled for the SimdAsHWIntrinsic constructors --- src/coreclr/jit/simdashwintrinsic.cpp | 31 ++++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index cdfb88d7f46a4..c5df27aad5bfc 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -331,6 +331,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, { if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)) { + assert(newobjThis == nullptr); impSpillSideEffect(true, verCurrentState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); } @@ -1182,11 +1183,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case 2: { - if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)) + if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic) && (newobjThis == nullptr)) { impSpillSideEffect(true, verCurrentState.esStackDepth - - ((newobjThis == nullptr) ? 2 : 1)DEBUGARG( - "Spilling op1 side effects for SimdAsHWIntrinsic")); + 2 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); } CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); @@ -1743,18 +1743,17 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case 3: { - if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)) + if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic) && (newobjThis == nullptr)) { impSpillSideEffect(true, verCurrentState.esStackDepth - - ((newobjThis == nullptr) ? 3 : 2) - DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); + 3 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); } if (SimdAsHWIntrinsicInfo::SpillSideEffectsOp2(intrinsic)) { + assert(newobjThis == nullptr); impSpillSideEffect(true, verCurrentState.esStackDepth - - ((newobjThis == nullptr) ? 2 : 1) - DEBUGARG("Spilling op2 side effects for SimdAsHWIntrinsic")); + 2 DEBUGARG("Spilling op2 side effects for SimdAsHWIntrinsic")); } CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); @@ -1876,7 +1875,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, else if (areArgumentsContiguous(op2, op3)) { GenTree* op2Address = CreateAddressNodeForSimdHWIntrinsicCreate(op2, simdBaseType, 8); - op2 = gtNewOperNode(GT_IND, TYP_SIMD8, op2Address); + copyBlkSrc = gtNewOperNode(GT_IND, TYP_SIMD8, op2Address); } else { @@ -1916,10 +1915,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { assert(isInstanceMethod); assert(SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)); + + if (newobjThis == nullptr) { impSpillSideEffect(true, verCurrentState.esStackDepth - - ((newobjThis == nullptr) ? 4 : 3) - DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); + 4 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); } assert(!SimdAsHWIntrinsicInfo::SpillSideEffectsOp2(intrinsic)); @@ -1972,7 +1972,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, else if (areArgumentsContiguous(op2, op3) && areArgumentsContiguous(op3, op4)) { GenTree* op2Address = CreateAddressNodeForSimdHWIntrinsicCreate(op2, simdBaseType, 12); - op2 = gtNewOperNode(GT_IND, TYP_SIMD12, op2Address); + copyBlkSrc = gtNewOperNode(GT_IND, TYP_SIMD12, op2Address); } else { @@ -2005,10 +2005,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { assert(isInstanceMethod); assert(SimdAsHWIntrinsicInfo::SpillSideEffectsOp1(intrinsic)); + + if (newobjThis == nullptr) { impSpillSideEffect(true, verCurrentState.esStackDepth - - ((newobjThis == nullptr) ? 5 : 4) - DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); + 5 DEBUGARG("Spilling op1 side effects for SimdAsHWIntrinsic")); } assert(!SimdAsHWIntrinsicInfo::SpillSideEffectsOp2(intrinsic)); @@ -2067,7 +2068,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, areArgumentsContiguous(op4, op5)) { GenTree* op2Address = CreateAddressNodeForSimdHWIntrinsicCreate(op2, simdBaseType, 16); - op2 = gtNewOperNode(GT_IND, TYP_SIMD16, op2Address); + copyBlkSrc = gtNewOperNode(GT_IND, TYP_SIMD16, op2Address); } else { From 064832c27ee4d472430597d10ea69b1119458864 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 11:59:23 -0800 Subject: [PATCH 09/20] Remove SIMDIntrinsicInitArray* and SIMDIntrinsicCopyToArray*, they are handled in managed code exclusively --- src/coreclr/jit/codegenloongarch64.cpp | 129 ---------------- src/coreclr/jit/gentree.cpp | 18 --- src/coreclr/jit/gentree.h | 2 - src/coreclr/jit/liveness.cpp | 13 -- src/coreclr/jit/lower.cpp | 31 ---- src/coreclr/jit/lower.h | 6 - src/coreclr/jit/lowerarmarch.cpp | 22 --- src/coreclr/jit/lowerloongarch64.cpp | 26 ---- src/coreclr/jit/lowerxarch.cpp | 22 --- src/coreclr/jit/lsra.h | 4 - src/coreclr/jit/lsraarm64.cpp | 68 --------- src/coreclr/jit/lsraloongarch64.cpp | 23 --- src/coreclr/jit/lsraxarch.cpp | 52 ------- src/coreclr/jit/rationalize.cpp | 31 ---- src/coreclr/jit/simd.cpp | 141 ------------------ src/coreclr/jit/simdintrinsiclist.h | 8 - src/coreclr/jit/valuenum.cpp | 51 +------ .../src/System/Numerics/Vector2.cs | 39 +++-- .../src/System/Numerics/Vector3.cs | 39 ++--- .../src/System/Numerics/Vector4.cs | 40 ++--- .../src/System/Numerics/Vector_1.cs | 91 ++++++----- .../System/Runtime/Intrinsics/Vector128.cs | 6 + .../System/Runtime/Intrinsics/Vector256.cs | 6 + .../src/System/Runtime/Intrinsics/Vector64.cs | 6 + 24 files changed, 141 insertions(+), 733 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index dddc5baa5d9de..298d17e9e4cbf 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -4882,135 +4882,6 @@ insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) return INS_OPTS_NONE; } -//------------------------------------------------------------------------ -// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//---------------------------------------------------------------------------------- -// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Notes: -// The Widen intrinsics are broken into separate intrinsics for the two results. -// -void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Notes: -// This intrinsic takes two arguments. The first operand is narrowed to produce the -// lower elements of the results, and the second operand produces the high elements. -// -void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations -// add, sub, mul, bit-wise And, AndNot and Or. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operator -// == and != -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//------------------------------------------------------------------------------------ -// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - -//------------------------------------------------------------------------------------ -// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) -{ - NYI("unimplemented on LOONGARCH64 yet"); -} - //----------------------------------------------------------------------------- // genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to // the given register, if any, or to memory. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 41f60a4301bb8..60c5dd7ccef4f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -6457,12 +6457,6 @@ bool GenTree::OperIsImplicitIndir() const return true; case GT_INTRINSIC: return AsIntrinsic()->gtIntrinsicName == NI_System_Object_GetType; -#ifdef FEATURE_SIMD - case GT_SIMD: - { - return AsSIMD()->OperIsMemoryLoad(); - } -#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: { @@ -18722,18 +18716,6 @@ var_types GenTreeJitIntrinsic::GetSimdBaseType() const return JitType2PreciseVarType(simdBaseJitType); } -//------------------------------------------------------------------------ -// OperIsMemoryLoad: Does this SIMD intrinsic have memory load semantics? -// -// Return Value: -// Whether this intrinsic may throw NullReferenceException if the -// address is "null". -// -bool GenTreeSIMD::OperIsMemoryLoad() const -{ - return GetSIMDIntrinsicId() == SIMDIntrinsicInitArray; -} - /* static */ bool GenTreeSIMD::Equals(GenTreeSIMD* op1, GenTreeSIMD* op2) { return (op1->TypeGet() == op2->TypeGet()) && (op1->GetSIMDIntrinsicId() == op2->GetSIMDIntrinsicId()) && diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ac751d04d8e83..f355162a932a3 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6330,8 +6330,6 @@ struct GenTreeSIMD : public GenTreeJitIntrinsic } #endif - bool OperIsMemoryLoad() const; - SIMDIntrinsicID GetSIMDIntrinsicId() const { return gtSIMDIntrinsicID; diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 20fd0a6216574..e1f0a9d02658e 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -280,19 +280,6 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree) fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed); break; -#ifdef FEATURE_SIMD - case GT_SIMD: - { - GenTreeSIMD* simdNode = tree->AsSIMD(); - if (simdNode->OperIsMemoryLoad()) - { - // This instruction loads from memory and we need to record this information - fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed); - } - break; - } -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 9cfd80c28ae3b..27cbfe94df314 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -384,12 +384,6 @@ GenTree* Lowering::LowerNode(GenTree* node) break; #endif // TARGET_XARCH -#ifdef FEATURE_SIMD - case GT_SIMD: - LowerSIMD(node->AsSIMD()); - break; -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: return LowerHWIntrinsic(node->AsHWIntrinsic()); @@ -7061,11 +7055,6 @@ void Lowering::ContainCheckNode(GenTree* node) ContainCheckIntrinsic(node->AsOp()); break; #endif // TARGET_XARCH -#ifdef FEATURE_SIMD - case GT_SIMD: - ContainCheckSIMD(node->AsSIMD()); - break; -#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: ContainCheckHWIntrinsic(node->AsHWIntrinsic()); @@ -7572,26 +7561,6 @@ void Lowering::TryRetypingFloatingPointStoreToIntegerStore(GenTree* store) } } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::LowerSIMD(GenTreeSIMD* simdNode) -{ - if (simdNode->TypeGet() == TYP_SIMD12) - { - // GT_SIMD node requiring to produce TYP_SIMD12 in fact - // produces a TYP_SIMD16 result - simdNode->gtType = TYP_SIMD16; - } - - ContainCheckSIMD(simdNode); -} -#endif // FEATURE_SIMD - #if defined(FEATURE_HW_INTRINSICS) //---------------------------------------------------------------------------------------------- // Lowering::InsertNewSimdCreateScalarUnsafeNode: Inserts a new simd CreateScalarUnsafe node diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index ace595ff7219f..7139018730060 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -107,9 +107,6 @@ class Lowering final : public Phase void ContainCheckFloatBinary(GenTreeOp* node); void ContainCheckIntrinsic(GenTreeOp* node); #endif // TARGET_XARCH -#ifdef FEATURE_SIMD - void ContainCheckSIMD(GenTreeSIMD* simdNode); -#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS void ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* addr); void ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node); @@ -346,9 +343,6 @@ class Lowering final : public Phase GenTree* LowerArrElem(GenTreeArrElem* arrElem); void LowerRotate(GenTree* tree); void LowerShift(GenTreeOp* shift); -#ifdef FEATURE_SIMD - void LowerSIMD(GenTreeSIMD* simdNode); -#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node); void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 33702d4bb33f0..6fd900f8a1032 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2496,28 +2496,6 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) } } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) -{ - switch (simdNode->GetSIMDIntrinsicId()) - { - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - CheckImmedAndMakeContained(simdNode, simdNode->Op(2)); - break; - - default: - break; - } -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index d892e72d24d8a..ee42e0c58b32f 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -505,19 +505,6 @@ void Lowering::LowerRotate(GenTree* tree) ContainCheckShiftRotate(tree->AsOp()); } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::LowerSIMD(GenTreeSIMD* simdNode) -{ - NYI_LOONGARCH64("LowerSIMD"); -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node. @@ -815,19 +802,6 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) } } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) -{ - NYI_LOONGARCH64("ContainCheckSIMD"); -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- // ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node. diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 5e40ea3408a73..0d88903db3877 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6124,28 +6124,6 @@ void Lowering::ContainCheckIntrinsic(GenTreeOp* node) } } -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------------------- -// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. -// -// Arguments: -// simdNode - The SIMD intrinsic node. -// -void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) -{ - switch (simdNode->GetSIMDIntrinsicId()) - { - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - CheckImmedAndMakeContained(simdNode, simdNode->Op(2)); - break; - - default: - break; - } -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- // TryGetContainableHWIntrinsicOp: Tries to get a containable node for a given HWIntrinsic diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index e30736a63fd5a..195d1e6523710 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1916,10 +1916,6 @@ class LinearScan : public LinearScanInterface } #endif // TARGET_X86 -#ifdef FEATURE_SIMD - int BuildSIMD(GenTreeSIMD* tree); -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount); #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 18852899492ba..8f5d24ba31304 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -380,12 +380,6 @@ int LinearScan::BuildNode(GenTree* tree) } break; -#ifdef FEATURE_SIMD - case GT_SIMD: - srcCount = BuildSIMD(tree->AsSIMD()); - break; -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount); @@ -797,68 +791,6 @@ int LinearScan::BuildNode(GenTree* tree) return srcCount; } -#ifdef FEATURE_SIMD -//------------------------------------------------------------------------ -// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. -// -// Arguments: -// tree - The GT_SIMD node of interest -// -// Return Value: -// The number of sources consumed by this node. -// -int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) -{ - int srcCount = 0; - assert(!simdTree->isContained()); - int dstCount = simdTree->IsValue() ? 1 : 0; - assert(dstCount == 1); - - bool buildUses = true; - - switch (simdTree->GetSIMDIntrinsicId()) - { - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - break; - - case SIMDIntrinsicInitArrayX: - case SIMDIntrinsicInitFixed: - case SIMDIntrinsicCopyToArray: - case SIMDIntrinsicCopyToArrayX: - case SIMDIntrinsicNone: - case SIMDIntrinsicInvalid: - assert(!"These intrinsics should not be seen during register allocation"); - FALLTHROUGH; - - default: - noway_assert(!"Unimplemented SIMD node type."); - unreached(); - } - if (buildUses) - { - assert(srcCount == 0); - srcCount = BuildOperandUses(simdTree->Op(1)); - - if ((simdTree->GetOperandCount() == 2) && !simdTree->Op(2)->isContained()) - { - srcCount += BuildOperandUses(simdTree->Op(2)); - } - } - assert(internalCount <= MaxInternalCount); - buildInternalRegisterUses(); - if (dstCount == 1) - { - BuildDef(simdTree); - } - else - { - assert(dstCount == 0); - } - return srcCount; -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS #include "hwintrinsic.h" diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 0d5a20d6f3dfc..0611b1d5e162d 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -327,12 +327,6 @@ int LinearScan::BuildNode(GenTree* tree) } break; -#ifdef FEATURE_SIMD - case GT_SIMD: - srcCount = BuildSIMD(tree->AsSIMD()); - break; -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount); @@ -642,23 +636,6 @@ int LinearScan::BuildNode(GenTree* tree) return srcCount; } -#ifdef FEATURE_SIMD -//------------------------------------------------------------------------ -// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. -// -// Arguments: -// tree - The GT_SIMD node of interest -// -// Return Value: -// The number of sources consumed by this node. -// -int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) -{ - NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); - return 0; -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS #include "hwintrinsic.h" //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 00177092c3942..3d6633e8bf32c 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -343,12 +343,6 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = BuildIntrinsic(tree->AsOp()); break; -#ifdef FEATURE_SIMD - case GT_SIMD: - srcCount = BuildSIMD(tree->AsSIMD()); - break; -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount); @@ -1905,52 +1899,6 @@ int LinearScan::BuildIntrinsic(GenTree* tree) return srcCount; } -#ifdef FEATURE_SIMD -//------------------------------------------------------------------------ -// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. -// -// Arguments: -// tree - The GT_SIMD node of interest -// -// Return Value: -// The number of sources consumed by this node. -// -int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) -{ - // All intrinsics have a dstCount of 1 - assert(simdTree->IsValue()); - - bool buildUses = true; - regMaskTP dstCandidates = RBM_NONE; - - assert(!simdTree->isContained()); - SetContainsAVXFlags(simdTree->GetSimdSize()); - int srcCount = 0; - - switch (simdTree->GetSIMDIntrinsicId()) - { - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - break; - - default: - noway_assert(!"Unimplemented SIMD node type."); - unreached(); - } - if (buildUses) - { - assert(srcCount == 0); - // This is overly conservative, but is here for zero diffs. - GenTree* op1 = simdTree->Op(1); - GenTree* op2 = (simdTree->GetOperandCount() == 2) ? simdTree->Op(2) : nullptr; - srcCount = BuildRMWUses(simdTree, op1, op2); - } - buildInternalRegisterUses(); - BuildDef(simdTree, dstCandidates); - return srcCount; -} -#endif // FEATURE_SIMD - #ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ // BuildHWIntrinsic: Set the NodeInfo for a GT_HWINTRINSIC tree. diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index e2fae6778b323..c0c90c4c19cb4 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -608,37 +608,6 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge assert(comp->IsTargetIntrinsic(node->AsIntrinsic()->gtIntrinsicName)); break; -#ifdef FEATURE_SIMD - case GT_SIMD: - { - GenTreeSIMD* simdNode = node->AsSIMD(); - unsigned simdSize = simdNode->GetSimdSize(); - var_types simdType = comp->getSIMDTypeForSize(simdSize); - - // Certain SIMD trees require rationalizing. - if (simdNode->AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInitArray) - { - // Rewrite this as an explicit load. - JITDUMP("Rewriting GT_SIMD array init as an explicit load:\n"); - unsigned int baseTypeSize = genTypeSize(simdNode->GetSimdBaseType()); - - GenTree* base = simdNode->Op(1); - GenTree* index = (simdNode->GetOperandCount() == 2) ? simdNode->Op(2) : nullptr; - GenTree* address = new (comp, GT_LEA) - GenTreeAddrMode(TYP_BYREF, base, index, baseTypeSize, OFFSETOF__CORINFO_Array__data); - GenTree* ind = comp->gtNewOperNode(GT_IND, simdType, address); - - BlockRange().InsertBefore(simdNode, address, ind); - use.ReplaceWith(ind); - BlockRange().Remove(simdNode); - - DISPTREERANGE(BlockRange(), use.Def()); - JITDUMP("\n"); - } - } - break; -#endif // FEATURE_SIMD - default: // Check that we don't have nodes not allowed in HIR here. assert((node->DebugOperKind() & DBK_NOTHIR) == 0); diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 0d265d7cb6a52..eca4cc86d4d87 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -1684,147 +1684,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, switch (simdIntrinsicID) { - case SIMDIntrinsicInitArray: - case SIMDIntrinsicInitArrayX: - case SIMDIntrinsicCopyToArray: - case SIMDIntrinsicCopyToArrayX: - { - // op3 - index into array in case of SIMDIntrinsicCopyToArrayX and SIMDIntrinsicInitArrayX - // op2 - array itself - // op1 - byref to vector struct - - unsigned int vectorLength = getSIMDVectorLength(size, simdBaseType); - // (This constructor takes only the zero-based arrays.) - // We will add one or two bounds checks: - // 1. If we have an index, we must do a check on that first. - // We can't combine it with the index + vectorLength check because - // a. It might be negative, and b. It may need to raise a different exception - // (captured as SCK_ARG_RNG_EXCPN for CopyTo and Init). - // 2. We need to generate a check (SCK_ARG_EXCPN for CopyTo and Init) - // for the last array element we will access. - // We'll either check against (vectorLength - 1) or (index + vectorLength - 1). - - GenTree* checkIndexExpr = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength - 1); - - // Get the index into the array. If it has been provided, it will be on the - // top of the stack. Otherwise, it is null. - if (argCount == 3) - { - op3 = impSIMDPopStack(TYP_INT); - if (op3->IsIntegralConst(0)) - { - op3 = nullptr; - } - } - else - { - // TODO-CQ: Here, or elsewhere, check for the pattern where op2 is a newly constructed array, and - // change this to the InitN form. - // op3 = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0); - op3 = nullptr; - } - - // Clone the array for use in the bounds check. - op2 = impSIMDPopStack(TYP_REF); - assert(op2->TypeGet() == TYP_REF); - GenTree* arrayRefForArgChk = op2; - GenTree* argRngChk = nullptr; - if ((arrayRefForArgChk->gtFlags & GTF_SIDE_EFFECT) != 0) - { - op2 = fgInsertCommaFormTemp(&arrayRefForArgChk); - } - else - { - op2 = gtCloneExpr(arrayRefForArgChk); - } - assert(op2 != nullptr); - - if (op3 != nullptr) - { - // We need to use the original expression on this, which is the first check. - GenTree* arrayRefForArgRngChk = arrayRefForArgChk; - // Then we clone the clone we just made for the next check. - arrayRefForArgChk = gtCloneExpr(op2); - // We know we MUST have had a cloneable expression. - assert(arrayRefForArgChk != nullptr); - GenTree* index = op3; - if ((index->gtFlags & GTF_SIDE_EFFECT) != 0) - { - op3 = fgInsertCommaFormTemp(&index); - } - else - { - op3 = gtCloneExpr(index); - } - - GenTreeArrLen* arrLen = - gtNewArrLen(TYP_INT, arrayRefForArgRngChk, (int)OFFSETOF__CORINFO_Array__length, compCurBB); - argRngChk = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(index, arrLen, SCK_ARG_RNG_EXCPN); - // Now, clone op3 to create another node for the argChk - GenTree* index2 = gtCloneExpr(op3); - assert(index != nullptr); - checkIndexExpr = gtNewOperNode(GT_ADD, TYP_INT, index2, checkIndexExpr); - } - - // Insert a bounds check for index + offset - 1. - // This must be a "normal" array. - SpecialCodeKind op2CheckKind; - if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX) - { - op2CheckKind = SCK_ARG_RNG_EXCPN; - } - else - { - op2CheckKind = SCK_ARG_EXCPN; - } - GenTreeArrLen* arrLen = - gtNewArrLen(TYP_INT, arrayRefForArgChk, (int)OFFSETOF__CORINFO_Array__length, compCurBB); - GenTreeBoundsChk* argChk = - new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(checkIndexExpr, arrLen, op2CheckKind); - - // Create a GT_COMMA tree for the bounds check(s). - op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), argChk, op2); - if (argRngChk != nullptr) - { - op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), argRngChk, op2); - } - - if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX) - { - op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); - simdTree = (op3 != nullptr) - ? gtNewSIMDNode(simdType, op2, op3, SIMDIntrinsicInitArray, simdBaseJitType, size) - : gtNewSIMDNode(simdType, op2, SIMDIntrinsicInitArray, simdBaseJitType, size); - copyBlkDst = op1; - doCopyBlk = true; - } - else - { - assert(simdIntrinsicID == SIMDIntrinsicCopyToArray || simdIntrinsicID == SIMDIntrinsicCopyToArrayX); - op1 = impSIMDPopStack(simdType, instMethod); - assert(op1->TypeGet() == simdType); - - // copy vector (op1) to array (op2) starting at index (op3) - simdTree = op1; - copyBlkDst = op2; - if (op3 != nullptr) - { -#ifdef TARGET_64BIT - // Upcast the index: it is safe to use a zero-extending cast since we've bounds checked it above. - op3 = gtNewCastNode(TYP_I_IMPL, op3, /* fromUnsigned */ true, TYP_I_IMPL); -#endif // !TARGET_64BIT - GenTree* elemSizeNode = gtNewIconNode(genTypeSize(simdBaseType), TYP_I_IMPL); - GenTree* indexOffs = gtNewOperNode(GT_MUL, TYP_I_IMPL, op3, elemSizeNode); - copyBlkDst = gtNewOperNode(GT_ADD, TYP_BYREF, copyBlkDst, indexOffs); - } - - copyBlkDst = gtNewOperNode(GT_ADD, TYP_BYREF, copyBlkDst, - gtNewIconNode(OFFSETOF__CORINFO_Array__data, TYP_I_IMPL)); - doCopyBlk = true; - } - } - break; - case SIMDIntrinsicInitFixed: { // We are initializing a fixed-length vector VLarge with a smaller fixed-length vector VSmall, plus 1 or 2 diff --git a/src/coreclr/jit/simdintrinsiclist.h b/src/coreclr/jit/simdintrinsiclist.h index 1f6d18873baa4..12b5bb2a3bbcc 100644 --- a/src/coreclr/jit/simdintrinsiclist.h +++ b/src/coreclr/jit/simdintrinsiclist.h @@ -39,17 +39,9 @@ SIMD_INTRINSIC(nullptr, false, None, "None", TYP_UNDEF, 0, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) // .ctor call or newobj -// This form takes the object plus an array of the base (element) type: -SIMD_INTRINSIC(".ctor", true, InitArray, "initArray", TYP_VOID, 2, {TYP_BYREF, TYP_REF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -// This form takes the object, an array of the base (element) type, and an index into the array: -SIMD_INTRINSIC(".ctor", true, InitArrayX, "initArray", TYP_VOID, 3, {TYP_BYREF, TYP_REF, TYP_INT }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) // This form takes the object, a smaller fixed vector, and one or two additional arguments of the base type, e.g. Vector3 V = new Vector3(V2, x); where V2 is a Vector2, and x is a float. SIMD_INTRINSIC(".ctor", true, InitFixed, "initFixed", TYP_VOID, 3, {TYP_BYREF, TYP_STRUCT, TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -// Copy vector to an array -SIMD_INTRINSIC("CopyTo", true, CopyToArray, "CopyToArray", TYP_VOID, 2, {TYP_BYREF, TYP_REF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("CopyTo", true, CopyToArrayX, "CopyToArray", TYP_VOID, 3, {TYP_BYREF, TYP_REF, TYP_INT }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - // Internal intrinsics for saving & restoring the upper half of a vector register SIMD_INTRINSIC("UpperSave", false, UpperSave, "UpperSave Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) SIMD_INTRINSIC("UpperRestore", false, UpperRestore, "UpperRestore Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index e82d187d5e889..805ef29bcd867 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -9583,56 +9583,7 @@ void Compiler::fgValueNumberSimd(GenTreeSIMD* tree) ValueNumPair op1Xvnp; vnStore->VNPUnpackExc(tree->Op(1)->gtVNPair, &op1vnp, &op1Xvnp); - ValueNum addrVN = ValueNumStore::NoVN; - bool isMemoryLoad = tree->OperIsMemoryLoad(); - - if (isMemoryLoad) - { - // Currently the only SIMD operation with MemoryLoad semantics is SIMDIntrinsicInitArray - // and it has to be handled specially since it has an optional op2 - // - assert(tree->GetSIMDIntrinsicId() == SIMDIntrinsicInitArray); - - // rationalize rewrites this as an explicit load with op1 as the base address - assert(tree->OperIsImplicitIndir()); - - ValueNumPair op2vnp; - if (tree->GetOperandCount() != 2) - { - // No op2 means that we have an impicit index of zero - op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); - - excSetPair = op1Xvnp; - } - else // We have an explicit index in op2 - { - ValueNumPair op2Xvnp; - vnStore->VNPUnpackExc(tree->Op(2)->gtVNPair, &op2vnp, &op2Xvnp); - - excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); - } - - assert(vnStore->VNFuncArity(simdFunc) == 2); - addrVN = vnStore->VNForFunc(TYP_BYREF, simdFunc, op1vnp.GetLiberal(), op2vnp.GetLiberal()); - -#ifdef DEBUG - if (verbose) - { - printf("Treating GT_SIMD %s as a ByrefExposed load , addrVN is ", - simdIntrinsicNames[tree->GetSIMDIntrinsicId()]); - vnPrint(addrVN, 0); - } -#endif // DEBUG - - // The address could point anywhere, so it is an ByrefExposed load. - // - ValueNum loadVN = fgValueNumberByrefExposedLoad(tree->TypeGet(), addrVN); - tree->gtVNPair.SetLiberal(loadVN); - tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); - tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); - fgValueNumberAddExceptionSetForIndirection(tree, tree->Op(1)); - return; - } + ValueNum addrVN = ValueNumStore::NoVN; if (tree->GetOperandCount() == 1) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs index 90e2aa683ca7b..bf2f215b19afb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs @@ -565,11 +565,18 @@ public static Vector2 TransformNormal(Vector2 normal, Matrix4x4 matrix) /// is . /// The number of elements in the current instance is greater than in the array. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array) { - CopyTo(array, 0); + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + + if (array.Length < Count) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); + Unsafe.WriteUnaligned(ref address, this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -582,53 +589,53 @@ public readonly void CopyTo(float[] array) /// -or- /// is greater than or equal to the array length. /// is multidimensional. - [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array, int index) { - if (array is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - if ((index < 0) || (index >= array.Length)) + if ((uint)index >= (uint)array.Length) { ThrowHelper.ThrowStartIndexArgumentOutOfRange_ArgumentOutOfRange_IndexMustBeLess(); } - if ((array.Length - index) < 2) + if ((array.Length - index) < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - array[index] = X; - array[index + 1] = Y; + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, index), this); } /// Copies the vector to the given .The length of the destination span must be at least 2. /// The destination span which the values are copied into. /// If number of elements in source vector is greater than those available in destination span. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(Span destination) { - if (destination.Length < 2) + if ((uint)destination.Length < (uint)Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + Unsafe.WriteUnaligned(ref address, this); } /// Attempts to copy the vector to the given . The length of the destination span must be at least 2. /// The destination span which the values are copied into. /// if the source vector was successfully copied to . if is not large enough to hold the source vector. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool TryCopyTo(Span destination) { - if (destination.Length < 2) + if ((uint)destination.Length < (uint)Count) { return false; } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); - + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + Unsafe.WriteUnaligned(ref address, this); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs index e800227711f59..97c84d0c3a0d7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs @@ -587,11 +587,18 @@ public static Vector3 TransformNormal(Vector3 normal, Matrix4x4 matrix) /// is . /// The number of elements in the current instance is greater than in the array. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array) { - CopyTo(array, 0); + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + + if (array.Length < Count) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); + Unsafe.WriteUnaligned(ref address, this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -604,55 +611,53 @@ public readonly void CopyTo(float[] array) /// -or- /// is greater than or equal to the array length. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array, int index) { - if (array is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - if ((index < 0) || (index >= array.Length)) + if ((uint)index >= (uint)array.Length) { ThrowHelper.ThrowStartIndexArgumentOutOfRange_ArgumentOutOfRange_IndexMustBeLess(); } - if ((array.Length - index) < 3) + if ((array.Length - index) < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - array[index] = X; - array[index + 1] = Y; - array[index + 2] = Z; + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, index), this); } /// Copies the vector to the given . The length of the destination span must be at least 3. /// The destination span which the values are copied into. /// If number of elements in source vector is greater than those available in destination span. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(Span destination) { - if (destination.Length < 3) + if ((uint)destination.Length < (uint)Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + Unsafe.WriteUnaligned(ref address, this); } /// Attempts to copy the vector to the given . The length of the destination span must be at least 3. /// The destination span which the values are copied into. /// if the source vector was successfully copied to . if is not large enough to hold the source vector. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool TryCopyTo(Span destination) { - if (destination.Length < 3) + if ((uint)destination.Length < (uint)Count) { return false; } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); - + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + Unsafe.WriteUnaligned(ref address, this); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs index bb12c1d1cab52..bed8a44e70cad 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs @@ -672,11 +672,18 @@ public static Vector4 Transform(Vector4 value, Quaternion rotation) /// is . /// The number of elements in the current instance is greater than in the array. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array) { - CopyTo(array, 0); + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + + if (array.Length < Count) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); + Unsafe.WriteUnaligned(ref address, this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -689,56 +696,53 @@ public readonly void CopyTo(float[] array) /// -or- /// is greater than or equal to the array length. /// is multidimensional. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(float[] array, int index) { - if (array is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - if ((index < 0) || (index >= array.Length)) + if ((uint)index >= (uint)array.Length) { ThrowHelper.ThrowStartIndexArgumentOutOfRange_ArgumentOutOfRange_IndexMustBeLess(); } - if ((array.Length - index) < 4) + if ((array.Length - index) < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - array[index] = X; - array[index + 1] = Y; - array[index + 2] = Z; - array[index + 3] = W; + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, index), this); } /// Copies the vector to the given . The length of the destination span must be at least 4. /// The destination span which the values are copied into. /// If number of elements in source vector is greater than those available in destination span. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(Span destination) { - if (destination.Length < 4) + if ((uint)destination.Length < (uint)Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + Unsafe.WriteUnaligned(ref address, this); } /// Attempts to copy the vector to the given . The length of the destination span must be at least 4. /// The destination span which the values are copied into. /// if the source vector was successfully copied to . if is not large enough to hold the source vector. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool TryCopyTo(Span destination) { - if (destination.Length < 4) + if ((uint)destination.Length < (uint)Count) { return false; } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); - + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + Unsafe.WriteUnaligned(ref address, this); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs index 55cd24908a060..b3740f7c98222 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs @@ -55,9 +55,19 @@ public Vector(T value) /// A new with its elements set to the first elements from . /// is null. /// The length of is less than . - [Intrinsic] - public Vector(T[] values) : this(values, 0) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Vector(T[] values) { + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + Unsafe.SkipInit(out this); + + if (values.Length < Count) + { + ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); + } + + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); + this = Unsafe.ReadUnaligned>(ref address); } /// Creates a new from a given array. @@ -66,22 +76,19 @@ public Vector(T[] values) : this(values, 0) /// A new with its elements set to the first elements from . /// is null. /// The length of , starting from , is less than . - [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector(T[] values, int index) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if (values is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + Unsafe.SkipInit(out this); if ((index < 0) || ((values.Length - index) < Count)) { ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[index])); + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); + this = Unsafe.ReadUnaligned>(ref Unsafe.Add(ref address, index)); } /// Creates a new from a given readonly span. @@ -91,14 +98,16 @@ public Vector(T[] values, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector(ReadOnlySpan values) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + Unsafe.SkipInit(out this); if (values.Length < Count) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(values))); + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); + this = Unsafe.ReadUnaligned>(ref address); } /// Creates a new from a given readonly span. @@ -108,14 +117,16 @@ public Vector(ReadOnlySpan values) [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector(ReadOnlySpan values) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + Unsafe.SkipInit(out this); - if (values.Length < Vector.Count) + if (values.Length < Count) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - this = Unsafe.ReadUnaligned>(ref MemoryMarshal.GetReference(values)); + ref byte address = ref MemoryMarshal.GetReference(values); + this = Unsafe.ReadUnaligned>(ref address); } /// Creates a new from a given span. @@ -652,8 +663,19 @@ public static Vector operator >>>(Vector value, int shiftCount) /// The array to which the current instance is copied. /// is null. /// The length of is less than . - [Intrinsic] - public void CopyTo(T[] destination) => CopyTo(destination, 0); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyTo(T[] destination) + { + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + + if (destination.Length < Count) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); + Unsafe.WriteUnaligned(ref address, this); + } /// Copies a to a given array starting at the specified index. /// The array to which the current instance is copied. @@ -661,15 +683,10 @@ public static Vector operator >>>(Vector value, int shiftCount) /// is null. /// The length of is less than . /// is negative or greater than the length of . - [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyTo(T[] destination, int startIndex) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if (destination is null) - { - ThrowHelper.ThrowNullReferenceException(); - } + // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons if ((uint)startIndex >= (uint)destination.Length) { @@ -681,37 +698,38 @@ public void CopyTo(T[] destination, int startIndex) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned>(ref Unsafe.As(ref destination[startIndex]), this); + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, startIndex), this); } /// Copies a to a given span. /// The span to which the current instance is copied. /// The length of is less than sizeof(). + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyTo(Span destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if ((uint)destination.Length < (uint)Vector.Count) + if ((uint)destination.Length < (uint)Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned>(ref MemoryMarshal.GetReference(destination), this); + ref byte address = ref MemoryMarshal.GetReference(destination); + Unsafe.WriteUnaligned(ref address, this); } /// Copies a to a given span. /// The span to which the current instance is copied. /// The length of is less than . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyTo(Span destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - if ((uint)destination.Length < (uint)Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); + ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + Unsafe.WriteUnaligned(ref address, this); } /// Returns a boolean indicating whether the given Object is equal to this vector instance. @@ -809,22 +827,23 @@ public string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] strin /// Tries to copy a to a given span. /// The span to which the current instance is copied. /// true if the current instance was successfully copied to ; otherwise, false if the length of is less than sizeof(). + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryCopyTo(Span destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if ((uint)destination.Length < (uint)Vector.Count) + if ((uint)destination.Length < (uint)Count) { return false; } - Unsafe.WriteUnaligned>(ref MemoryMarshal.GetReference(destination), this); + ref byte address = ref MemoryMarshal.GetReference(destination); + Unsafe.WriteUnaligned(ref address, this); return true; } /// Tries to copy a to a given span. /// The span to which the current instance is copied. /// true if the current instance was successfully copied to ; otherwise, false if the length of is less than . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryCopyTo(Span destination) { ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index a1d433731dffe..0079c0aa41318 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -599,6 +599,7 @@ public static unsafe Vector128 ConvertToUInt64(Vector128 vector) /// The length of is less than . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector128 vector, T[] destination) where T : struct { @@ -622,6 +623,7 @@ public static void CopyTo(this Vector128 vector, T[] destination) /// is negative or greater than the length of . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe void CopyTo(this Vector128 vector, T[] destination, int startIndex) where T : struct { @@ -647,6 +649,7 @@ public static unsafe void CopyTo(this Vector128 vector, T[] destination, i /// The span to which the is copied. /// The length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector128 vector, Span destination) where T : struct { @@ -779,6 +782,7 @@ public static unsafe Vector128 Create(T value) /// The length of is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Create(T[] values) where T : struct { @@ -801,6 +805,7 @@ public static Vector128 Create(T[] values) /// The length of , starting from , is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Create(T[] values, int index) where T : struct { @@ -2774,6 +2779,7 @@ public static unsafe Vector256 ToVector256Unsafe(this Vector128 vector) /// The span to which is copied. /// true if was successfully copied to ; otherwise, false if the length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool TryCopyTo(this Vector128 vector, Span destination) where T : struct { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 552236ddc4e35..fd998dcbf6b36 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -525,6 +525,7 @@ public static Vector256 ConvertToUInt64(Vector256 vector) /// The length of is less than . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector256 vector, T[] destination) where T : struct { @@ -548,6 +549,7 @@ public static void CopyTo(this Vector256 vector, T[] destination) /// is negative or greater than the length of . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector256 vector, T[] destination, int startIndex) where T : struct { @@ -573,6 +575,7 @@ public static void CopyTo(this Vector256 vector, T[] destination, int star /// The span to which the is copied. /// The length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector256 vector, Span destination) where T : struct { @@ -705,6 +708,7 @@ public static Vector256 Create(T value) /// The length of is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Create(T[] values) where T : struct { @@ -727,6 +731,7 @@ public static Vector256 Create(T[] values) /// The length of , starting from , is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Create(T[] values, int index) where T : struct { @@ -2750,6 +2755,7 @@ public static T ToScalar(this Vector256 vector) /// The span to which is copied. /// true if was successfully copied to ; otherwise, false if the length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool TryCopyTo(this Vector256 vector, Span destination) where T : struct { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 6a897cd90364a..159b350836f49 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -465,6 +465,7 @@ public static unsafe Vector64 ConvertToUInt64(Vector64 vector) /// The length of is less than . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector64 vector, T[] destination) where T : struct { @@ -488,6 +489,7 @@ public static void CopyTo(this Vector64 vector, T[] destination) /// is negative or greater than the length of . /// The type of and () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe void CopyTo(this Vector64 vector, T[] destination, int startIndex) where T : struct { @@ -513,6 +515,7 @@ public static unsafe void CopyTo(this Vector64 vector, T[] destination, in /// The span to which is copied. /// The length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyTo(this Vector64 vector, Span destination) where T : struct { @@ -646,6 +649,7 @@ public static unsafe Vector64 Create(T value) /// The length of is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Create(T[] values) where T : struct { @@ -668,6 +672,7 @@ public static Vector64 Create(T[] values) /// The length of , starting from , is less than . /// The type of () is not supported. /// is null. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Create(T[] values, int index) where T : struct { @@ -2428,6 +2433,7 @@ public static unsafe Vector128 ToVector128Unsafe(this Vector64 vector) /// The span to which is copied. /// true if was successfully copied to ; otherwise, false if the length of is less than . /// The type of and () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool TryCopyTo(this Vector64 vector, Span destination) where T : struct { From e689ad21f0a4f6cbc23a3367d775c1362ea5a5c7 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 12:35:27 -0800 Subject: [PATCH 10/20] Move SIMDIntrinsicInitFixed to be implemented via SimdAsHWIntrinsic and remove impSIMDIntrinsic --- src/coreclr/jit/compiler.h | 21 - src/coreclr/jit/importercalls.cpp | 12 - src/coreclr/jit/simd.cpp | 449 ------------------- src/coreclr/jit/simdashwintrinsic.cpp | 71 +++ src/coreclr/jit/simdashwintrinsiclistarm64.h | 3 + src/coreclr/jit/simdashwintrinsiclistxarch.h | 3 + src/coreclr/jit/simdintrinsiclist.h | 4 - 7 files changed, 77 insertions(+), 486 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8cce22548c6b8..ef03cf289179e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8587,16 +8587,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return getBaseJitTypeAndSizeOfSIMDType(typeHnd, nullptr); } - // Get SIMD Intrinsic info given the method handle. - // Also sets typeHnd, argCount, baseType and sizeBytes out params. - const SIMDIntrinsicInfo* getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* typeHnd, - CORINFO_METHOD_HANDLE methodHnd, - CORINFO_SIG_INFO* sig, - bool isNewObj, - unsigned* argCount, - CorInfoType* simdBaseJitType, - unsigned* sizeBytes); - // Pops and returns GenTree node from importers type stack. // Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes. GenTree* impSIMDPopStack(var_types type, bool expectAddr = false, CORINFO_CLASS_HANDLE structType = nullptr); @@ -8608,17 +8598,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool areArgumentsContiguous(GenTree* op1, GenTree* op2); GenTree* CreateAddressNodeForSimdHWIntrinsicCreate(GenTree* tree, var_types simdBaseType, unsigned simdSize); - // check methodHnd to see if it is a SIMD method that is expanded as an intrinsic in the JIT. - GenTree* impSIMDIntrinsic(OPCODE opcode, - GenTree* newobjThis, - CORINFO_CLASS_HANDLE clsHnd, - CORINFO_METHOD_HANDLE method, - CORINFO_SIG_INFO* sig, - unsigned methodFlags, - int memberRef); - - GenTree* getOp1ForConstructor(OPCODE opcode, GenTree* newobjThis, CORINFO_CLASS_HANDLE clsHnd); - // Whether SIMD vector occupies part of SIMD register. // SSE2: vector2f/3f are considered sub register SIMD types. // AVX: vector2f, 3f and 4f are all considered sub register SIMD types. diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 3fddff3e16331..7398e983dd8ea 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -265,18 +265,6 @@ var_types Compiler::impImportCall(OPCODE opcode, } } -#ifdef FEATURE_SIMD - if (isIntrinsic) - { - call = impSIMDIntrinsic(opcode, newobjThis, clsHnd, methHnd, sig, mflags, pResolvedToken->token); - if (call != nullptr) - { - bIntrinsicImported = true; - goto DONE_CALL; - } - } -#endif // FEATURE_SIMD - if ((mflags & CORINFO_FLG_VIRTUAL) && (mflags & CORINFO_FLG_EnC) && (opcode == CEE_CALLVIRT)) { NO_WAY("Virtual call to a function added via EnC is not supported"); diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index eca4cc86d4d87..8f18908134f51 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -906,251 +906,6 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH return simdBaseJitType; } -//-------------------------------------------------------------------------------------- -// getSIMDIntrinsicInfo: get SIMD intrinsic info given the method handle. -// -// Arguments: -// inOutTypeHnd - The handle of the type on which the method is invoked. This is an in-out param. -// methodHnd - The handle of the method we're interested in. -// sig - method signature info -// isNewObj - whether this call represents a newboj constructor call -// argCount - argument count - out pram -// simdBaseJitType - base JIT type of the intrinsic - out param -// sizeBytes - size of SIMD vector type on which the method is invoked - out param -// -// Return Value: -// SIMDIntrinsicInfo struct initialized corresponding to methodHnd. -// Sets SIMDIntrinsicInfo.id to SIMDIntrinsicInvalid if methodHnd doesn't correspond -// to any SIMD intrinsic. Also, sets the out params inOutTypeHnd, argCount, baseType and -// sizeBytes. -// -// Note that VectorMath class doesn't have a base type and first argument of the method -// determines the SIMD vector type on which intrinsic is invoked. In such a case inOutTypeHnd -// is modified by this routine. -// -// TODO-Throughput: The current implementation is based on method name string parsing. -// Although we now have type identification from the VM, the parsing of intrinsic names -// could be made more efficient. -// -const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* inOutTypeHnd, - CORINFO_METHOD_HANDLE methodHnd, - CORINFO_SIG_INFO* sig, - bool isNewObj, - unsigned* argCount, - CorInfoType* simdBaseJitType, - unsigned* sizeBytes) -{ - assert(simdBaseJitType != nullptr); - assert(sizeBytes != nullptr); - - // get simdBaseJitType and size of the type - CORINFO_CLASS_HANDLE typeHnd = *inOutTypeHnd; - *simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(typeHnd, sizeBytes); - - if (typeHnd == m_simdHandleCache->SIMDVectorHandle) - { - // All of the supported intrinsics on this static class take a first argument that's a vector, - // which determines the simdBaseJitType. - // The exception is the IsHardwareAccelerated property, which is handled as a special case. - assert(*simdBaseJitType == CORINFO_TYPE_UNDEF); - assert(sig->numArgs != 0); - { - typeHnd = info.compCompHnd->getArgClass(sig, sig->args); - *inOutTypeHnd = typeHnd; - *simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(typeHnd, sizeBytes); - } - } - - if (*simdBaseJitType == CORINFO_TYPE_UNDEF) - { - JITDUMP("NOT a SIMD Intrinsic: unsupported baseType\n"); - return nullptr; - } - - var_types simdBaseType = JitType2PreciseVarType(*simdBaseJitType); - - // account for implicit "this" arg - *argCount = sig->numArgs; - if (sig->hasThis()) - { - *argCount += 1; - } - - // Get the Intrinsic Id by parsing method name. - // - // TODO-Throughput: replace sequential search by binary search by arranging entries - // sorted by method name. - SIMDIntrinsicID intrinsicId = SIMDIntrinsicInvalid; - const char* methodName = info.compCompHnd->getMethodNameFromMetadata(methodHnd, nullptr, nullptr, nullptr); - for (int i = SIMDIntrinsicNone + 1; i < SIMDIntrinsicInvalid; ++i) - { - if (strcmp(methodName, simdIntrinsicInfoArray[i].methodName) == 0) - { - // Found an entry for the method; further check whether it is one of - // the supported base types. - bool found = false; - for (int j = 0; j < SIMD_INTRINSIC_MAX_BASETYPE_COUNT; ++j) - { - // Convention: if there are fewer base types supported than MAX_BASETYPE_COUNT, - // the end of the list is marked by TYP_UNDEF. - if (simdIntrinsicInfoArray[i].supportedBaseTypes[j] == TYP_UNDEF) - { - break; - } - - if (simdIntrinsicInfoArray[i].supportedBaseTypes[j] == simdBaseType) - { - found = true; - break; - } - } - - if (!found) - { - continue; - } - - // Now, check the arguments. - unsigned int fixedArgCnt = simdIntrinsicInfoArray[i].argCount; - unsigned int expectedArgCnt = fixedArgCnt; - - // The listed arg types include the vector and the first two init values, which is the expected number - // for Vector2. For other cases, we'll check their types here. - if (*argCount > expectedArgCnt) - { - if (i == SIMDIntrinsicInitFixed) - { - if (*argCount == 4 && typeHnd == m_simdHandleCache->SIMDVector4Handle) - { - expectedArgCnt = 4; - } - } - } - if (*argCount != expectedArgCnt) - { - continue; - } - - // Validate the types of individual args passed are what is expected of. - // If any of the types don't match with what is expected, don't consider - // as an intrinsic. This will make an older JIT with SIMD capabilities - // resilient to breaking changes to SIMD managed API. - // - // Note that from IL type stack, args get popped in right to left order - // whereas args get listed in method signatures in left to right order. - - int stackIndex = (expectedArgCnt - 1); - - // Track the arguments from the signature - we currently only use this to distinguish - // integral and pointer types, both of which will by TYP_I_IMPL on the importer stack. - CORINFO_ARG_LIST_HANDLE argLst = sig->args; - - CORINFO_CLASS_HANDLE argClass; - for (unsigned int argIndex = 0; found == true && argIndex < expectedArgCnt; argIndex++) - { - bool isThisPtr = ((argIndex == 0) && sig->hasThis()); - - // In case of "newobj SIMDVector(T val)", thisPtr won't be present on type stack. - // We don't check anything in that case. - if (!isThisPtr || !isNewObj) - { - GenTree* arg = impStackTop(stackIndex).val; - var_types argType = arg->TypeGet(); - - var_types expectedArgType; - if (argIndex < fixedArgCnt) - { - // Convention: - // - intrinsicInfo.argType[i] == TYP_UNDEF - intrinsic doesn't have a valid arg at position i - // - intrinsicInfo.argType[i] == TYP_UNKNOWN - arg type should be same as simdBaseType - // Note that we pop the args off in reverse order. - expectedArgType = simdIntrinsicInfoArray[i].argType[argIndex]; - assert(expectedArgType != TYP_UNDEF); - if (expectedArgType == TYP_UNKNOWN) - { - // The type of the argument will be genActualType(*simdBaseType). - expectedArgType = genActualType(simdBaseType); - argType = genActualType(argType); - } - } - else - { - expectedArgType = simdBaseType; - } - - if (!isThisPtr && argType == TYP_I_IMPL) - { - // The reference implementation has a constructor that takes a pointer. - // We don't want to recognize that one. This requires us to look at the CorInfoType - // in order to distinguish a signature with a pointer argument from one with an - // integer argument of pointer size, both of which will be TYP_I_IMPL on the stack. - // TODO-Review: This seems quite fragile. We should consider beefing up the checking - // here. - CorInfoType corType = strip(info.compCompHnd->getArgType(sig, argLst, &argClass)); - if (corType == CORINFO_TYPE_PTR) - { - found = false; - } - } - - if (varTypeIsSIMD(argType)) - { - argType = TYP_STRUCT; - } - if (argType != expectedArgType) - { - found = false; - } - } - if (argIndex != 0 || !sig->hasThis()) - { - argLst = info.compCompHnd->getArgNext(argLst); - } - stackIndex--; - } - - // Cross check return type and static vs. instance is what we are expecting. - // If not, don't consider it as an intrinsic. - // Note that ret type of TYP_UNKNOWN means that it is not known apriori and must be same as simdBaseType - if (found) - { - var_types expectedRetType = simdIntrinsicInfoArray[i].retType; - if (expectedRetType == TYP_UNKNOWN) - { - // JIT maps uint/ulong type vars to TYP_INT/TYP_LONG. - expectedRetType = (simdBaseType == TYP_UINT || simdBaseType == TYP_ULONG) - ? genActualType(simdBaseType) - : simdBaseType; - } - - if (JITtype2varType(sig->retType) != expectedRetType || - sig->hasThis() != simdIntrinsicInfoArray[i].isInstMethod) - { - found = false; - } - } - - if (found) - { - intrinsicId = (SIMDIntrinsicID)i; - break; - } - } - } - - if (intrinsicId != SIMDIntrinsicInvalid) - { - JITDUMP("Method %s maps to SIMD intrinsic %s\n", methodName, simdIntrinsicNames[intrinsicId]); - return &simdIntrinsicInfoArray[intrinsicId]; - } - else - { - JITDUMP("Method %s is NOT a SIMD intrinsic\n", methodName); - } - - return nullptr; -} - // Pops and returns GenTree node from importer's type stack. // Normalizes TYP_STRUCT value in case of GT_CALL and GT_RET_EXPR. // @@ -1222,40 +977,6 @@ GenTree* Compiler::impSIMDPopStack(var_types type, bool expectAddr, CORINFO_CLAS return tree; } -//------------------------------------------------------------------------ -// getOp1ForConstructor: Get the op1 for a constructor call. -// -// Arguments: -// opcode - the opcode being handled (needed to identify the CEE_NEWOBJ case) -// newobjThis - For CEE_NEWOBJ, this is the temp grabbed for the allocated uninitialized object. -// clsHnd - The handle of the class of the method. -// -// Return Value: -// The tree node representing the object to be initialized with the constructor. -// -// Notes: -// This method handles the differences between the CEE_NEWOBJ and constructor cases. -// -GenTree* Compiler::getOp1ForConstructor(OPCODE opcode, GenTree* newobjThis, CORINFO_CLASS_HANDLE clsHnd) -{ - GenTree* op1; - if (opcode == CEE_NEWOBJ) - { - op1 = newobjThis; - assert(newobjThis->OperIs(GT_LCL_VAR_ADDR)); - - // push newobj result on type stack - unsigned lclNum = op1->AsLclVarCommon()->GetLclNum(); - impPushOnStack(gtNewLclvNode(lclNum, lvaGetRealType(lclNum)), verMakeTypeInfo(clsHnd).NormaliseForStack()); - } - else - { - op1 = impSIMDPopStack(TYP_BYREF); - } - assert(op1->TypeGet() == TYP_BYREF); - return op1; -} - //------------------------------------------------------------------- // Set the flag that indicates that the lclVar referenced by this tree // is used in a SIMD intrinsic. @@ -1598,174 +1319,4 @@ void Compiler::impMarkContiguousSIMDFieldAssignments(Statement* stmt) fgPreviousCandidateSIMDFieldAsgStmt = nullptr; } } - -//------------------------------------------------------------------------ -// impSIMDIntrinsic: Check method to see if it is a SIMD method -// -// Arguments: -// opcode - the opcode being handled (needed to identify the CEE_NEWOBJ case) -// newobjThis - For CEE_NEWOBJ, this is the temp grabbed for the allocated uninitialized object. -// clsHnd - The handle of the class of the method. -// method - The handle of the method. -// sig - The call signature for the method. -// memberRef - The memberRef token for the method reference. -// -// Return Value: -// If clsHnd is a known SIMD type, and 'method' is one of the methods that are -// implemented as an intrinsic in the JIT, then return the tree that implements -// it. -// -GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, - GenTree* newobjThis, - CORINFO_CLASS_HANDLE clsHnd, - CORINFO_METHOD_HANDLE methodHnd, - CORINFO_SIG_INFO* sig, - unsigned methodFlags, - int memberRef) -{ - assert((methodFlags & CORINFO_FLG_INTRINSIC) != 0); - - // Exit early if we are not in one of the SIMD types. - if (!isSIMDClass(clsHnd)) - { - return nullptr; - } - - // Get base type and intrinsic Id - CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - unsigned size = 0; - unsigned argCount = 0; - const SIMDIntrinsicInfo* intrinsicInfo = - getSIMDIntrinsicInfo(&clsHnd, methodHnd, sig, (opcode == CEE_NEWOBJ), &argCount, &simdBaseJitType, &size); - - // Exit early if the intrinsic is invalid or unrecognized - if ((intrinsicInfo == nullptr) || (intrinsicInfo->id == SIMDIntrinsicInvalid)) - { - return nullptr; - } - - if (!IsBaselineSimdIsaSupported()) - { - // The user disabled support for the baseline ISA so - // don't emit any SIMD intrinsics as they all require - // this at a minimum. - - return nullptr; - } - - SIMDIntrinsicID simdIntrinsicID = intrinsicInfo->id; - var_types simdBaseType; - var_types simdType; - - assert(simdBaseJitType != CORINFO_TYPE_UNDEF); - { - simdBaseType = JitType2PreciseVarType(simdBaseJitType); - simdType = getSIMDTypeForSize(size); - } - - bool instMethod = intrinsicInfo->isInstMethod; - var_types callType = JITtype2varType(sig->retType); - if (callType == TYP_STRUCT) - { - // Note that here we are assuming that, if the call returns a struct, that it is the same size as the - // struct on which the method is declared. This is currently true for all methods on Vector types, - // but if this ever changes, we will need to determine the callType from the signature. - assert(info.compCompHnd->getClassSize(sig->retTypeClass) == genTypeSize(simdType)); - callType = simdType; - } - - GenTree* simdTree = nullptr; - GenTree* op1 = nullptr; - GenTree* op2 = nullptr; - GenTree* op3 = nullptr; - GenTree* retVal = nullptr; - GenTree* copyBlkDst = nullptr; - bool doCopyBlk = false; - - switch (simdIntrinsicID) - { - case SIMDIntrinsicInitFixed: - { - // We are initializing a fixed-length vector VLarge with a smaller fixed-length vector VSmall, plus 1 or 2 - // additional floats. - // op4 (optional) - float value for VLarge.W, if VLarge is Vector4, and VSmall is Vector2 - // op3 - float value for VLarge.Z or VLarge.W - // op2 - VSmall - // op1 - byref of VLarge - assert(simdBaseType == TYP_FLOAT); - - GenTree* op4 = nullptr; - if (argCount == 4) - { - op4 = impSIMDPopStack(TYP_FLOAT); - assert(op4->TypeGet() == TYP_FLOAT); - } - op3 = impSIMDPopStack(TYP_FLOAT); - assert(op3->TypeGet() == TYP_FLOAT); - // The input vector will either be TYP_SIMD8 or TYP_SIMD12. - var_types smallSIMDType = TYP_SIMD8; - if ((op4 == nullptr) && (simdType == TYP_SIMD16)) - { - smallSIMDType = TYP_SIMD12; - } - op2 = impSIMDPopStack(smallSIMDType); - op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); - - // We are going to redefine the operands so that: - // - op3 is the value that's going into the Z position, or null if it's a Vector4 constructor with a single - // operand, and - // - op4 is the W position value, or null if this is a Vector3 constructor. - if (size == 16 && argCount == 3) - { - op4 = op3; - op3 = nullptr; - } - - simdTree = op2; - if (op3 != nullptr) - { - simdTree = gtNewSimdWithElementNode(simdType, simdTree, gtNewIconNode(2, TYP_INT), op3, simdBaseJitType, - size, /* isSimdAsHWIntrinsic */ true); - } - if (op4 != nullptr) - { - simdTree = gtNewSimdWithElementNode(simdType, simdTree, gtNewIconNode(3, TYP_INT), op4, simdBaseJitType, - size, /* isSimdAsHWIntrinsic */ true); - } - - copyBlkDst = op1; - doCopyBlk = true; - } - break; - - default: - assert(!"Unimplemented SIMD Intrinsic"); - return nullptr; - } - -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) - // XArch/Arm64: also indicate that we use floating point registers. - // The need for setting this here is that a method may not have SIMD - // type lclvars, but might be exercising SIMD intrinsics on fields of - // SIMD type. - // - // e.g. public Vector ComplexVecFloat::sqabs() { return this.r * this.r + this.i * this.i; } - compFloatingPointUsed = true; -#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) - - // At this point, we have a tree that we are going to store into a destination. - // TODO-1stClassStructs: This should be a simple store or assignment, and should not require - // GTF_ALL_EFFECT for the dest. This is currently emulating the previous behavior of - // block ops. - if (doCopyBlk) - { - GenTree* dest = new (this, GT_BLK) - GenTreeBlk(GT_BLK, simdType, copyBlkDst, typGetBlkLayout(getSIMDTypeSizeInBytes(clsHnd))); - dest->gtFlags |= GTF_GLOB_REF; - retVal = gtNewBlkOpNode(dest, simdTree); - } - - return retVal; -} - #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index c5df27aad5bfc..40161ea41caf9 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -1901,6 +1901,43 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } + case NI_Vector3_CreateFromVector2: + case NI_Vector4_CreateFromVector3: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert((simdSize == 12) || (simdSize == 16)); + + // TODO-CQ: We should be able to check for contiguous args here after + // the relevant methods are updated to support more than just float + + if (op2->IsCnsVec() && op3->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = op2->AsVecCon(); + vecCon->gtType = simdType; + + if (simdSize == 12) + { + vecCon->gtSimd12Val.f32[2] = static_cast(op3->AsDblCon()->DconValue()); + } + else + { + vecCon->gtSimd16Val.f32[3] = static_cast(op3->AsDblCon()->DconValue()); + } + + copyBlkSrc = vecCon; + } + else + { + GenTree* idx = gtNewIconNode((simdSize == 12) ? 2 : 3, TYP_INT); + copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, op3, idx, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ true); + } + + copyBlkDst = op1; + break; + } + default: { // Some platforms warn about unhandled switch cases @@ -1991,6 +2028,40 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } + case NI_Vector4_CreateFromVector2: + { + assert(retType == TYP_VOID); + assert(simdBaseType == TYP_FLOAT); + assert(simdSize == 16); + + // TODO-CQ: We should be able to check for contiguous args here after + // the relevant methods are updated to support more than just float + + if (op2->IsCnsVec() && op3->IsCnsFltOrDbl() && op4->IsCnsFltOrDbl()) + { + GenTreeVecCon* vecCon = op2->AsVecCon(); + vecCon->gtType = simdType; + + vecCon->gtSimd16Val.f32[2] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimd16Val.f32[3] = static_cast(op4->AsDblCon()->DconValue()); + + copyBlkSrc = vecCon; + } + else + { + GenTree* idx = gtNewIconNode(2, TYP_INT); + op2 = gtNewSimdWithElementNode(simdType, op2, op3, idx, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ true); + + idx = gtNewIconNode(3, TYP_INT); + copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, op4, idx, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ true); + } + + copyBlkDst = op1; + break; + } + default: { // Some platforms warn about unhandled switch cases diff --git a/src/coreclr/jit/simdashwintrinsiclistarm64.h b/src/coreclr/jit/simdashwintrinsiclistarm64.h index a25e9e932d145..405923bb2e3ed 100644 --- a/src/coreclr/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/jit/simdashwintrinsiclistarm64.h @@ -61,6 +61,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRoot, SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector3, Create, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector3, CreateFromVector2, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateFromVector2, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero , NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -82,6 +83,8 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRoot, SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector4, Create, ".ctor", 5, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector4, CreateFromVector2, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateFromVector2, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector4, CreateFromVector3, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateFromVector3, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/jit/simdashwintrinsiclistxarch.h b/src/coreclr/jit/simdashwintrinsiclistxarch.h index 70400d8dfd42f..3fd2f87a1a539 100644 --- a/src/coreclr/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/jit/simdashwintrinsiclistxarch.h @@ -61,6 +61,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRoot, SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector3, Create, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector3, CreateFromVector2, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateFromVector2, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -82,6 +83,8 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRoot, SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector4, Create, ".ctor", 5, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Create, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector4, CreateFromVector2, ".ctor", 4, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateFromVector2, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) +SIMD_AS_HWINTRINSIC_NM(Vector4, CreateFromVector3, ".ctor", 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateFromVector3, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/jit/simdintrinsiclist.h b/src/coreclr/jit/simdintrinsiclist.h index 12b5bb2a3bbcc..74beb2db3d8ca 100644 --- a/src/coreclr/jit/simdintrinsiclist.h +++ b/src/coreclr/jit/simdintrinsiclist.h @@ -38,10 +38,6 @@ ***************************************************************************************************************************************************************************************************************************/ SIMD_INTRINSIC(nullptr, false, None, "None", TYP_UNDEF, 0, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -// .ctor call or newobj -// This form takes the object, a smaller fixed vector, and one or two additional arguments of the base type, e.g. Vector3 V = new Vector3(V2, x); where V2 is a Vector2, and x is a float. -SIMD_INTRINSIC(".ctor", true, InitFixed, "initFixed", TYP_VOID, 3, {TYP_BYREF, TYP_STRUCT, TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) - // Internal intrinsics for saving & restoring the upper half of a vector register SIMD_INTRINSIC("UpperSave", false, UpperSave, "UpperSave Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) SIMD_INTRINSIC("UpperRestore", false, UpperRestore, "UpperRestore Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) From 1c680110dfebfae6923052df4ed202a13e4d6c85 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 13:10:42 -0800 Subject: [PATCH 11/20] Apply formatting patch --- src/coreclr/jit/lowerxarch.cpp | 15 +++++++-------- src/coreclr/jit/simdashwintrinsic.cpp | 10 +++++----- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 0d88903db3877..f868b16f7507d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1200,7 +1200,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - ssize_t ival = op3->AsIntConCommon()->IconValue(); + ssize_t ival = op3->AsIntConCommon()->IconValue(); ssize_t zmask = (ival & 0x0F); ssize_t count_d = (ival & 0x30) >> 4; @@ -1217,7 +1217,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) ival = (count_s << 6) | (count_d << 4) | (zmask); op3->AsIntConCommon()->SetIconValue(ival); } - else if(op2IsVectorZero) + else if (op2IsVectorZero) { // When op2 is zero, we can modify the mask to // directly zero the element we're inserting @@ -1267,8 +1267,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) GenTreeHWIntrinsic* op1Intrinsic = op1->AsHWIntrinsic(); - if ((op1Intrinsic->GetHWIntrinsicId() != NI_SSE41_Insert) || - (op1Intrinsic->GetSimdBaseType() != TYP_FLOAT)) + if ((op1Intrinsic->GetHWIntrinsicId() != NI_SSE41_Insert) || (op1Intrinsic->GetSimdBaseType() != TYP_FLOAT)) { // Nothing to do if op1 isn't a float32 Sse41.Insert break; @@ -7312,8 +7311,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (op1->IsVectorZero()) { - // When op1 is zero, we can contain it and we expect that - // ival is already in the correct state to account for it +// When op1 is zero, we can contain it and we expect that +// ival is already in the correct state to account for it #if DEBUG ssize_t ival = lastOp->AsIntConCommon()->IconValue(); @@ -7333,8 +7332,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } else if (op2->IsVectorZero()) { - // When op2 is zero, we can contain it and we expect that - // zmask is already in the correct state to account for it +// When op2 is zero, we can contain it and we expect that +// zmask is already in the correct state to account for it #if DEBUG ssize_t ival = lastOp->AsIntConCommon()->IconValue(); diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 40161ea41caf9..347ae5b6d083c 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -1931,7 +1931,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { GenTree* idx = gtNewIconNode((simdSize == 12) ? 2 : 3, TYP_INT); copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, op3, idx, simdBaseJitType, simdSize, - /* isSimdAsHWIntrinsic */ true); + /* isSimdAsHWIntrinsic */ true); } copyBlkDst = op1; @@ -2051,11 +2051,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { GenTree* idx = gtNewIconNode(2, TYP_INT); op2 = gtNewSimdWithElementNode(simdType, op2, op3, idx, simdBaseJitType, simdSize, - /* isSimdAsHWIntrinsic */ true); + /* isSimdAsHWIntrinsic */ true); - idx = gtNewIconNode(3, TYP_INT); - copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, op4, idx, simdBaseJitType, simdSize, - /* isSimdAsHWIntrinsic */ true); + idx = gtNewIconNode(3, TYP_INT); + copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, op4, idx, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ true); } copyBlkDst = op1; From e27fb6d73b277c3b842c9fa8abbd8468e7325d31 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 16:20:36 -0800 Subject: [PATCH 12/20] Ensure the Unsafe.Add occurs in the right position --- .../src/System/Numerics/Vector2.cs | 12 +++---- .../src/System/Numerics/Vector3.cs | 12 +++---- .../src/System/Numerics/Vector4.cs | 12 +++---- .../src/System/Numerics/Vector_1.cs | 36 ++++++------------- .../System/Runtime/Intrinsics/Vector128.cs | 21 ++++------- .../System/Runtime/Intrinsics/Vector256.cs | 21 ++++------- .../src/System/Runtime/Intrinsics/Vector64.cs | 21 ++++------- 7 files changed, 43 insertions(+), 92 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs index bf2f215b19afb..ecdc9dee10bec 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs @@ -575,8 +575,7 @@ public readonly void CopyTo(float[] array) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)), this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -604,8 +603,7 @@ public readonly void CopyTo(float[] array, int index) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, index), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index)), this); } /// Copies the vector to the given .The length of the destination span must be at least 2. @@ -619,8 +617,7 @@ public readonly void CopyTo(Span destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); } /// Attempts to copy the vector to the given . The length of the destination span must be at least 2. @@ -634,8 +631,7 @@ public readonly bool TryCopyTo(Span destination) return false; } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs index 97c84d0c3a0d7..f8326466664cd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs @@ -597,8 +597,7 @@ public readonly void CopyTo(float[] array) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)), this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -626,8 +625,7 @@ public readonly void CopyTo(float[] array, int index) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, index), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index)), this); } /// Copies the vector to the given . The length of the destination span must be at least 3. @@ -641,8 +639,7 @@ public readonly void CopyTo(Span destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); } /// Attempts to copy the vector to the given . The length of the destination span must be at least 3. @@ -656,8 +653,7 @@ public readonly bool TryCopyTo(Span destination) return false; } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs index bed8a44e70cad..5b2a3ec9e2103 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs @@ -682,8 +682,7 @@ public readonly void CopyTo(float[] array) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)), this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -711,8 +710,7 @@ public readonly void CopyTo(float[] array, int index) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, index), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index)), this); } /// Copies the vector to the given . The length of the destination span must be at least 4. @@ -726,8 +724,7 @@ public readonly void CopyTo(Span destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); } /// Attempts to copy the vector to the given . The length of the destination span must be at least 4. @@ -741,8 +738,7 @@ public readonly bool TryCopyTo(Span destination) return false; } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs index b3740f7c98222..874f63bc5b70c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs @@ -41,7 +41,6 @@ namespace System.Numerics [Intrinsic] public Vector(T value) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); Unsafe.SkipInit(out this); for (int index = 0; index < Count; index++) @@ -59,15 +58,13 @@ public Vector(T value) public Vector(T[] values) { // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - Unsafe.SkipInit(out this); if (values.Length < Count) { ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - this = Unsafe.ReadUnaligned>(ref address); + this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values))); } /// Creates a new from a given array. @@ -80,15 +77,13 @@ public Vector(T[] values) public Vector(T[] values, int index) { // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - Unsafe.SkipInit(out this); if ((index < 0) || ((values.Length - index) < Count)) { ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - this = Unsafe.ReadUnaligned>(ref Unsafe.Add(ref address, index)); + this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(values), index))); } /// Creates a new from a given readonly span. @@ -99,15 +94,13 @@ public Vector(T[] values, int index) public Vector(ReadOnlySpan values) { // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - Unsafe.SkipInit(out this); if (values.Length < Count) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); - this = Unsafe.ReadUnaligned>(ref address); + this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(values))); } /// Creates a new from a given readonly span. @@ -118,15 +111,13 @@ public Vector(ReadOnlySpan values) public Vector(ReadOnlySpan values) { // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - Unsafe.SkipInit(out this); if (values.Length < Count) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - ref byte address = ref MemoryMarshal.GetReference(values); - this = Unsafe.ReadUnaligned>(ref address); + this = Unsafe.ReadUnaligned>(ref MemoryMarshal.GetReference(values)); } /// Creates a new from a given span. @@ -673,8 +664,7 @@ public void CopyTo(T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)), this); } /// Copies a to a given array starting at the specified index. @@ -698,8 +688,7 @@ public void CopyTo(T[] destination, int startIndex) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, startIndex), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(destination), startIndex)), this); } /// Copies a to a given span. @@ -713,8 +702,7 @@ public void CopyTo(Span destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref MemoryMarshal.GetReference(destination); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref MemoryMarshal.GetReference(destination), this); } /// Copies a to a given span. @@ -728,8 +716,7 @@ public void CopyTo(Span destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); } /// Returns a boolean indicating whether the given Object is equal to this vector instance. @@ -835,8 +822,7 @@ public bool TryCopyTo(Span destination) return false; } - ref byte address = ref MemoryMarshal.GetReference(destination); - Unsafe.WriteUnaligned(ref address, this); + Unsafe.WriteUnaligned(ref MemoryMarshal.GetReference(destination), this); return true; } @@ -846,14 +832,12 @@ public bool TryCopyTo(Span destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryCopyTo(Span destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - if ((uint)destination.Length < (uint)Count) { return false; } - Unsafe.WriteUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); return true; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 0079c0aa41318..1bb4847438137 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -610,8 +610,7 @@ public static void CopyTo(this Vector128 vector, T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)), vector); } /// Copies a to a given array starting at the specified index. @@ -639,8 +638,7 @@ public static unsafe void CopyTo(this Vector128 vector, T[] destination, i ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, startIndex), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(destination), startIndex)), vector); } /// Copies a to a given span. @@ -658,8 +656,7 @@ public static void CopyTo(this Vector128 vector, Span destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); } /// Creates a new instance with all elements initialized to the specified value. @@ -793,8 +790,7 @@ public static Vector128 Create(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values))); } /// Creates a new from a given array. @@ -816,8 +812,7 @@ public static Vector128 Create(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref Unsafe.Add(ref address, index)); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(values), index))); } /// Creates a new from a given readonly span. @@ -835,8 +830,7 @@ public static Vector128 Create(ReadOnlySpan values) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(values))); } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -2788,8 +2782,7 @@ public static bool TryCopyTo(this Vector128 vector, Span destination) return false; } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index fd998dcbf6b36..ed889471ebb75 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -536,8 +536,7 @@ public static void CopyTo(this Vector256 vector, T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)), vector); } /// Copies a to a given array starting at the specified index. @@ -565,8 +564,7 @@ public static void CopyTo(this Vector256 vector, T[] destination, int star ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, startIndex), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(destination), startIndex)), vector); } /// Copies a to a given span. @@ -584,8 +582,7 @@ public static void CopyTo(this Vector256 vector, Span destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); } /// Creates a new instance with all elements initialized to the specified value. @@ -719,8 +716,7 @@ public static Vector256 Create(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values))); } /// Creates a new from a given array. @@ -742,8 +738,7 @@ public static Vector256 Create(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref Unsafe.Add(ref address, index)); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(values), index))); } /// Creates a new from a given readonly span. @@ -761,8 +756,7 @@ public static Vector256 Create(ReadOnlySpan values) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(values))); } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -2764,8 +2758,7 @@ public static bool TryCopyTo(this Vector256 vector, Span destination) return false; } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 159b350836f49..1bd0a6219ecb5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -476,8 +476,7 @@ public static void CopyTo(this Vector64 vector, T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)), vector); } /// Copies a to a given array starting at the specified index. @@ -505,8 +504,7 @@ public static unsafe void CopyTo(this Vector64 vector, T[] destination, in ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, startIndex), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(destination), startIndex)), vector); } /// Copies a to a given span. @@ -524,8 +522,7 @@ public static void CopyTo(this Vector64 vector, Span destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); } /// Creates a new instance with all elements initialized to the specified value. @@ -660,8 +657,7 @@ public static Vector64 Create(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values))); } /// Creates a new from a given array. @@ -683,8 +679,7 @@ public static Vector64 Create(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); - return Unsafe.ReadUnaligned>(ref Unsafe.Add(ref address, index)); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(values), index))); } /// Creates a new from a given readonly span. @@ -702,8 +697,7 @@ public static Vector64 Create(ReadOnlySpan values) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); - return Unsafe.ReadUnaligned>(ref address); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(values))); } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -2442,8 +2436,7 @@ public static bool TryCopyTo(this Vector64 vector, Span destination) return false; } - ref byte address = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - Unsafe.WriteUnaligned(ref address, vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), vector); return true; } From e9fe0e15692d2c5049a7681b9a7ef30e1ec859e0 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 17:57:08 -0800 Subject: [PATCH 13/20] Ensure the Vector APIs that take Span and ROSpan use sizeof(Vector) --- .../src/System/Numerics/Vector2.cs | 4 ++-- .../src/System/Numerics/Vector3.cs | 4 ++-- .../src/System/Numerics/Vector4.cs | 4 ++-- .../src/System/Numerics/Vector_1.cs | 23 +++++++++++-------- .../System/Runtime/Intrinsics/Vector128.cs | 4 ++-- .../System/Runtime/Intrinsics/Vector256.cs | 4 ++-- .../src/System/Runtime/Intrinsics/Vector64.cs | 4 ++-- 7 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs index ecdc9dee10bec..3f0fc57ac7ce9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs @@ -612,7 +612,7 @@ public readonly void CopyTo(float[] array, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -626,7 +626,7 @@ public readonly void CopyTo(Span destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool TryCopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { return false; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs index f8326466664cd..d5115f42d53bc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs @@ -634,7 +634,7 @@ public readonly void CopyTo(float[] array, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -648,7 +648,7 @@ public readonly void CopyTo(Span destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool TryCopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { return false; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs index 5b2a3ec9e2103..f97dca5e03f02 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs @@ -719,7 +719,7 @@ public readonly void CopyTo(float[] array, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void CopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -733,7 +733,7 @@ public readonly void CopyTo(Span destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool TryCopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { return false; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs index 874f63bc5b70c..9c813426a873d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs @@ -9,6 +9,9 @@ using System.Runtime.Intrinsics; using System.Text; +// We use sizeof(Vector) in a few places and want to ignore the warning that it could be a managed type +#pragma warning disable 8500 + namespace System.Numerics { /* Note: The following patterns are used throughout the code here and are described here @@ -108,11 +111,11 @@ public Vector(ReadOnlySpan values) /// A new with its elements set to the first sizeof() elements from . /// The length of is less than sizeof(). [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Vector(ReadOnlySpan values) + public unsafe Vector(ReadOnlySpan values) { // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - if (values.Length < Count) + if (values.Length < sizeof(Vector)) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); } @@ -151,9 +154,7 @@ public static unsafe int Count get { ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); -#pragma warning disable 8500 // sizeof of managed types return sizeof(Vector) / sizeof(T); -#pragma warning restore 8500 } } @@ -695,9 +696,9 @@ public void CopyTo(T[] destination, int startIndex) /// The span to which the current instance is copied. /// The length of is less than sizeof(). [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void CopyTo(Span destination) + public unsafe void CopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < sizeof(Vector)) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -711,7 +712,7 @@ public void CopyTo(Span destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -815,9 +816,9 @@ public string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] strin /// The span to which the current instance is copied. /// true if the current instance was successfully copied to ; otherwise, false if the length of is less than sizeof(). [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryCopyTo(Span destination) + public unsafe bool TryCopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < sizeof(Vector)) { return false; } @@ -832,7 +833,7 @@ public bool TryCopyTo(Span destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryCopyTo(Span destination) { - if ((uint)destination.Length < (uint)Count) + if (destination.Length < Count) { return false; } @@ -842,3 +843,5 @@ public bool TryCopyTo(Span destination) } } } + +#pragma warning restore CS8500 diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 1bb4847438137..f6446076c1b73 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -651,7 +651,7 @@ public static unsafe void CopyTo(this Vector128 vector, T[] destination, i public static void CopyTo(this Vector128 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector128.Count) + if (destination.Length < Vector128.Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -2777,7 +2777,7 @@ public static unsafe Vector256 ToVector256Unsafe(this Vector128 vector) public static bool TryCopyTo(this Vector128 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector128.Count) + if (destination.Length < Vector128.Count) { return false; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index ed889471ebb75..8fef6d56f3bd1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -577,7 +577,7 @@ public static void CopyTo(this Vector256 vector, T[] destination, int star public static void CopyTo(this Vector256 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector256.Count) + if (destination.Length < Vector256.Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -2753,7 +2753,7 @@ public static T ToScalar(this Vector256 vector) public static bool TryCopyTo(this Vector256 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector256.Count) + if (destination.Length < Vector256.Count) { return false; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 1bd0a6219ecb5..34eff21909104 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -517,7 +517,7 @@ public static unsafe void CopyTo(this Vector64 vector, T[] destination, in public static void CopyTo(this Vector64 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector64.Count) + if (destination.Length < Vector64.Count) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); } @@ -2431,7 +2431,7 @@ public static unsafe Vector128 ToVector128Unsafe(this Vector64 vector) public static bool TryCopyTo(this Vector64 vector, Span destination) where T : struct { - if ((uint)destination.Length < (uint)Vector64.Count) + if (destination.Length < Vector64.Count) { return false; } From 2299bd7dc23bdcc57820d793632c86baf90df82f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 15 Dec 2022 19:08:24 -0800 Subject: [PATCH 14/20] Ensure the Vector APIs that take Span/ROSpan check for unsupported types --- .../System.Private.CoreLib/src/System/Numerics/Vector_1.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs index 9c813426a873d..966cc2468dd77 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs @@ -114,6 +114,7 @@ public Vector(ReadOnlySpan values) public unsafe Vector(ReadOnlySpan values) { // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons + ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); if (values.Length < sizeof(Vector)) { @@ -698,6 +699,8 @@ public void CopyTo(T[] destination, int startIndex) [MethodImpl(MethodImplOptions.AggressiveInlining)] public unsafe void CopyTo(Span destination) { + ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); + if (destination.Length < sizeof(Vector)) { ThrowHelper.ThrowArgumentException_DestinationTooShort(); @@ -818,6 +821,8 @@ public string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] strin [MethodImpl(MethodImplOptions.AggressiveInlining)] public unsafe bool TryCopyTo(Span destination) { + ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); + if (destination.Length < sizeof(Vector)) { return false; From 14203ab890d0a718b43b1ad7660b827730c66cd7 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 16 Dec 2022 08:27:54 -0800 Subject: [PATCH 15/20] Wokaround an aliasing bug in GetArrayDataReference --- .../System.Private.CoreLib/src/System/Numerics/Vector2.cs | 4 ++-- .../System.Private.CoreLib/src/System/Numerics/Vector3.cs | 4 ++-- .../System.Private.CoreLib/src/System/Numerics/Vector4.cs | 4 ++-- .../src/System/Numerics/Vector_1.cs | 8 ++++---- .../src/System/Runtime/Intrinsics/Vector128.cs | 8 ++++---- .../src/System/Runtime/Intrinsics/Vector256.cs | 8 ++++---- .../src/System/Runtime/Intrinsics/Vector64.cs | 8 ++++---- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs index 3f0fc57ac7ce9..bb6d235cbffbe 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs @@ -575,7 +575,7 @@ public readonly void CopyTo(float[] array) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[0]), this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -603,7 +603,7 @@ public readonly void CopyTo(float[] array, int index) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[index]), this); } /// Copies the vector to the given .The length of the destination span must be at least 2. diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs index d5115f42d53bc..e7aa69589b29f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs @@ -597,7 +597,7 @@ public readonly void CopyTo(float[] array) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[0]), this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -625,7 +625,7 @@ public readonly void CopyTo(float[] array, int index) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[index]), this); } /// Copies the vector to the given . The length of the destination span must be at least 3. diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs index f97dca5e03f02..dbb62e749bb36 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs @@ -682,7 +682,7 @@ public readonly void CopyTo(float[] array) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(array)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[0]), this); } /// Copies the elements of the vector to a specified array starting at a specified index position. @@ -710,7 +710,7 @@ public readonly void CopyTo(float[] array, int index) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref array[index]), this); } /// Copies the vector to the given . The length of the destination span must be at least 4. diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs index 966cc2468dd77..cd102ac3a720c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs @@ -67,7 +67,7 @@ public Vector(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values))); + this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[0])); } /// Creates a new from a given array. @@ -86,7 +86,7 @@ public Vector(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(values), index))); + this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[index])); } /// Creates a new from a given readonly span. @@ -666,7 +666,7 @@ public void CopyTo(T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[0]), this); } /// Copies a to a given array starting at the specified index. @@ -690,7 +690,7 @@ public void CopyTo(T[] destination, int startIndex) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(destination), startIndex)), this); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[startIndex]), this); } /// Copies a to a given span. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index f6446076c1b73..88db7bd90f960 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -610,7 +610,7 @@ public static void CopyTo(this Vector128 vector, T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[0]), vector); } /// Copies a to a given array starting at the specified index. @@ -638,7 +638,7 @@ public static unsafe void CopyTo(this Vector128 vector, T[] destination, i ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(destination), startIndex)), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[startIndex]), vector); } /// Copies a to a given span. @@ -790,7 +790,7 @@ public static Vector128 Create(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values))); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[0])); } /// Creates a new from a given array. @@ -812,7 +812,7 @@ public static Vector128 Create(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - return Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(values), index))); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[index])); } /// Creates a new from a given readonly span. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 8fef6d56f3bd1..14b8e5bdfd1b0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -536,7 +536,7 @@ public static void CopyTo(this Vector256 vector, T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[0]), vector); } /// Copies a to a given array starting at the specified index. @@ -564,7 +564,7 @@ public static void CopyTo(this Vector256 vector, T[] destination, int star ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(destination), startIndex)), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[startIndex]), vector); } /// Copies a to a given span. @@ -716,7 +716,7 @@ public static Vector256 Create(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values))); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[0])); } /// Creates a new from a given array. @@ -738,7 +738,7 @@ public static Vector256 Create(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - return Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(values), index))); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[index])); } /// Creates a new from a given readonly span. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 34eff21909104..42028ce728b02 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -476,7 +476,7 @@ public static void CopyTo(this Vector64 vector, T[] destination) ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(destination)), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[0]), vector); } /// Copies a to a given array starting at the specified index. @@ -504,7 +504,7 @@ public static unsafe void CopyTo(this Vector64 vector, T[] destination, in ThrowHelper.ThrowArgumentException_DestinationTooShort(); } - Unsafe.WriteUnaligned(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(destination), startIndex)), vector); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[startIndex]), vector); } /// Copies a to a given span. @@ -657,7 +657,7 @@ public static Vector64 Create(T[] values) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - return Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values))); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[0])); } /// Creates a new from a given array. @@ -679,7 +679,7 @@ public static Vector64 Create(T[] values, int index) ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); } - return Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(values), index))); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[index])); } /// Creates a new from a given readonly span. From ca171fc8002d6b780ce61adca88ddb411c188fa7 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 16 Dec 2022 11:55:12 -0800 Subject: [PATCH 16/20] Ensure the right size/type is used for Vector###_Create contiguous args handling --- src/coreclr/jit/hwintrinsicarm64.cpp | 4 ++-- src/coreclr/jit/hwintrinsicxarch.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 40421f1170d55..0bd7c286e9df5 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -712,8 +712,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (areArgsContiguous) { op1 = nodeBuilder.GetOperand(0); - GenTree* op1Address = CreateAddressNodeForSimdHWIntrinsicCreate(op1, simdBaseType, 16); - retNode = gtNewOperNode(GT_IND, TYP_SIMD16, op1Address); + GenTree* op1Address = CreateAddressNodeForSimdHWIntrinsicCreate(op1, simdBaseType, simdSize); + retNode = gtNewOperNode(GT_IND, retType, op1Address); } else { diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 3b32895ef6508..3427756f1cc26 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1084,8 +1084,8 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, if (areArgsContiguous) { op1 = nodeBuilder.GetOperand(0); - GenTree* op1Address = CreateAddressNodeForSimdHWIntrinsicCreate(op1, simdBaseType, 16); - retNode = gtNewOperNode(GT_IND, TYP_SIMD16, op1Address); + GenTree* op1Address = CreateAddressNodeForSimdHWIntrinsicCreate(op1, simdBaseType, simdSize); + retNode = gtNewOperNode(GT_IND, retType, op1Address); } else { From 24e9525f9573386b8d71815ed885aaff2c48e0a1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 16 Dec 2022 18:01:37 -0800 Subject: [PATCH 17/20] Ensure that jitdiff --diff --pmi doesn't assert --- src/coreclr/jit/hwintrinsicarm64.cpp | 12 +++++-- src/coreclr/jit/hwintrinsicxarch.cpp | 13 ++++++-- src/coreclr/jit/lsraxarch.cpp | 48 +++++++++++++++++++++++++--- 3 files changed, 65 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 0bd7c286e9df5..78357b2918cb5 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -764,8 +764,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsFloating(simdBaseType)) { - op2 = impSIMDPopStack(retType); - op1 = impSIMDPopStack(retType); + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 3427756f1cc26..03c59de09a0e0 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1144,11 +1144,20 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsFloating(simdBaseType)) { - op2 = impSIMDPopStack(retType); - op1 = impSIMDPopStack(retType); + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); + break; } break; } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 3d6633e8bf32c..65afe8870f4b5 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1900,6 +1900,41 @@ int LinearScan::BuildIntrinsic(GenTree* tree) } #ifdef FEATURE_HW_INTRINSICS +//------------------------------------------------------------------------ +// SkipContainedCreateScalarUnsafe: Skips a contained CreateScalarUnsafe node +// and gets the underlying op1 instead +// +// Arguments: +// node - The node to handle +// +// Return Value: +// If node is a contained CreateScalarUnsafe, it's op1 is returned; +// otherwise node is returned unchanged. +static GenTree* SkipContainedCreateScalarUnsafe(GenTree* node) +{ + if (!node->OperIsHWIntrinsic() || !node->isContained()) + { + return node; + } + + GenTreeHWIntrinsic* hwintrinsic = node->AsHWIntrinsic(); + NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); + + switch (intrinsicId) + { + case NI_Vector128_CreateScalarUnsafe: + case NI_Vector256_CreateScalarUnsafe: + { + return hwintrinsic->Op(1); + } + + default: + { + return node; + } + } +} + //------------------------------------------------------------------------ // BuildHWIntrinsic: Set the NodeInfo for a GT_HWINTRINSIC tree. // @@ -1938,10 +1973,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - GenTree* op1 = intrinsicTree->Op(1); - GenTree* op2 = (numArgs >= 2) ? intrinsicTree->Op(2) : nullptr; - GenTree* op3 = (numArgs >= 3) ? intrinsicTree->Op(3) : nullptr; - GenTree* lastOp = intrinsicTree->Op(numArgs); + // A contained CreateScalarUnsafe is special in that we're not containing it to load from + // memory and it isn't a constant. Instead, its essentially a "transparent" node we're ignoring + // to simplify the overall IR handling. As such, we need to "skip" such nodes when present and + // get the underlying op1 so that delayFreeUse and other preferencing remains correct. + + GenTree* op1 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(1)); + GenTree* op2 = (numArgs >= 2) ? SkipContainedCreateScalarUnsafe(intrinsicTree->Op(2)) : nullptr; + GenTree* op3 = (numArgs >= 3) ? SkipContainedCreateScalarUnsafe(intrinsicTree->Op(3)) : nullptr; + GenTree* lastOp = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(numArgs)); bool buildUses = true; From 28e2366ed6d9036ca82f384ad5e4b288dd6a6ae1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 16 Dec 2022 18:35:12 -0800 Subject: [PATCH 18/20] Applying formatting patch --- src/coreclr/jit/lsraxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 65afe8870f4b5..29205bd4b98c5 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1903,10 +1903,10 @@ int LinearScan::BuildIntrinsic(GenTree* tree) //------------------------------------------------------------------------ // SkipContainedCreateScalarUnsafe: Skips a contained CreateScalarUnsafe node // and gets the underlying op1 instead -// +// // Arguments: // node - The node to handle -// +// // Return Value: // If node is a contained CreateScalarUnsafe, it's op1 is returned; // otherwise node is returned unchanged. From 38149d5a20e4633826c72e4b3f725c9643931008 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 16 Dec 2022 22:56:56 -0800 Subject: [PATCH 19/20] Ensure we don't return nullptr for a lowered node --- src/coreclr/jit/lowerxarch.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 4ab0ede4465da..9bb98072a2b13 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1125,7 +1125,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } node->ChangeHWIntrinsicId(NI_Vector128_GetElement); - LowerNode(node); + return LowerNode(node); } break; } @@ -1186,6 +1186,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // zero. This ensures we don't need to handle a case where op2 is zero // but not contained. + GenTree* nextNode = node->gtNext; + LIR::Use use; if (BlockRange().TryGetUse(node, &use)) @@ -1201,7 +1203,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) op3->SetUnusedValue(); BlockRange().Remove(node); - return op1->gtNext; + return nextNode; } if (!op3->IsCnsIntOrI()) @@ -3525,17 +3527,23 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) } assert(result->GetHWIntrinsicId() != intrinsicId); + GenTree* nextNode = LowerNode(result); - LowerNode(result); if (intrinsicId == NI_Vector256_WithElement) { // Now that we have finalized the shape of the tree, lower the insertion node as well. + assert(node->GetHWIntrinsicId() == NI_AVX_InsertVector128); assert(node != result); - LowerNode(node); + + nextNode = LowerNode(node); + } + else + { + assert(node == result); } - return node->gtNext; + return nextNode; } //---------------------------------------------------------------------------------------------- From 1332a014eadc019ef5a633f8ef9b402f9cb6a2e1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 27 Dec 2022 22:10:57 -0800 Subject: [PATCH 20/20] Ensure TYP_SIMD8 bitcast is handled in VN --- src/coreclr/jit/valuenum.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 805ef29bcd867..b6c74e5fcc679 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -3690,6 +3690,7 @@ ValueNum ValueNumStore::EvalBitCastForConstantArgs(var_types dstType, ValueNum a target_size_t nuint = 0; float float32 = 0; double float64 = 0; + simd8_t simd8 = {}; unsigned char bytes[8] = {}; switch (srcType) @@ -3719,6 +3720,12 @@ ValueNum ValueNumStore::EvalBitCastForConstantArgs(var_types dstType, ValueNum a float64 = ConstantValue(arg0VN); memcpy(bytes, &float64, sizeof(float64)); break; +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + simd8 = ConstantValue(arg0VN); + memcpy(bytes, &simd8, sizeof(simd8)); + break; +#endif // FEATURE_SIMD default: unreached(); } @@ -3759,6 +3766,11 @@ ValueNum ValueNumStore::EvalBitCastForConstantArgs(var_types dstType, ValueNum a case TYP_DOUBLE: memcpy(&float64, bytes, sizeof(float64)); return VNForDoubleCon(float64); +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + memcpy(&simd8, bytes, sizeof(simd8)); + return VNForSimd8Con(simd8); +#endif // FEATURE_SIMD default: unreached(); }