From 8c406af4aad0cba8ae36baa02a92fb6bbb0b1c49 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Wed, 16 Feb 2022 23:53:58 -0800 Subject: [PATCH] Morph `Vector.Create(0)` to `Vector.Zero` (#63821) * Initial work * Added a comma to display * Cleanup * Fixing build * More cleanup * Update comment * Update comment * Added CompareEqual Vector64/128 with Zero tests * Do not contain op1 for now * Wrong intrinsic id used * Removing generated tests * Removing generated tests * Added CompareEqual tests * Supporting containment for first operand * Fix test build * Passing correct register * Check IsVectorZero before not allocing a register * Update comment * Fixing test * Minor format change * Fixed formatting * Renamed test * Adding AdvSimd_Arm64 tests: * Adding support for rest of 'cmeq' and 'fcmeq' instructions * Removing github csproj * Minor test fix * Fixed tests * Fix print * Minor format change * Fixing test * Initial commit for Vector.Create to Vector.Zero normalization * Added some emitter tests * Feedback * Update emitarm64.cpp * Feedback * Handling variations of Vector.Create * Use Operands iterator instead of edges * Fix condition * Simplify * format * Fixed IsFloatPositiveZero * Uncomment * Updated tests to include Vector64.Create/Vector128.Create for ARM64 * Making implementation of IsFloatPositiveZero explicit * Update src/coreclr/jit/gentree.cpp Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> * Feedback * Update comment * Update comment * Do not perform optimization when VN CSE phase * use ResetHWIntrinsicId * Assert !optValnumCSE_phase * Simplify IsVectorZero * Simplify IsVectorZero * Simplify some uses of Vector*_get_Zero * Added another test * Fixed formatting * Revert lowering removal * Update gentree.h * Feedback Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> --- src/coreclr/jit/compiler.h | 3 + src/coreclr/jit/gentree.h | 54 +++------- src/coreclr/jit/morph.cpp | 90 ++++++++++++++++ .../JitBlue/Runtime_33972/Runtime_33972.cs | 102 ++++++++++++++++++ 4 files changed, 211 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 44c8834933d36..fa5bd6087220c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6421,6 +6421,9 @@ class Compiler GenTree* fgOptimizeCast(GenTreeCast* cast); GenTree* fgOptimizeEqualityComparisonWithConst(GenTreeOp* cmp); GenTree* fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp); +#ifdef FEATURE_HW_INTRINSICS + GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node); +#endif GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree); GenTree* fgOptimizeAddition(GenTreeOp* add); GenTree* fgOptimizeMultiply(GenTreeOp* mul); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 5d37fb3656265..2647cd62f3621 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7702,16 +7702,23 @@ inline bool GenTree::IsSIMDZero() const // inline bool GenTree::IsFloatPositiveZero() const { - return IsCnsFltOrDbl() && !IsCnsNonZeroFltOrDbl(); + if (IsCnsFltOrDbl()) + { + // This implementation is almost identical to IsCnsNonZeroFltOrDbl + // but it is easier to parse out + // rather than using !IsCnsNonZeroFltOrDbl. + double constValue = AsDblCon()->gtDconVal; + return *(__int64*)&constValue == 0; + } + + return false; } //------------------------------------------------------------------- -// IsVectorZero: returns true if this is an integral or floating-point (SIMD or HW intrinsic) vector -// with all its elements equal to zero. +// IsVectorZero: returns true if this node is a HWIntrinsic that is Vector*_get_Zero. // // Returns: -// True if this represents an integral or floating-point const (SIMD or HW intrinsic) vector with all its elements -// equal to zero. +// True if this represents a HWIntrinsic node that is Vector*_get_Zero. // // TODO: We already have IsSIMDZero() and IsIntegralConstVector(0), // however, IsSIMDZero() does not cover hardware intrinsics, and IsIntegralConstVector(0) does not cover floating @@ -7720,46 +7727,17 @@ inline bool GenTree::IsFloatPositiveZero() const // separate ones; preferably this one. inline bool GenTree::IsVectorZero() const { -#ifdef FEATURE_SIMD - if (gtOper == GT_SIMD) - { - const GenTreeSIMD* node = AsSIMD(); - - if (node->GetSIMDIntrinsicId() == SIMDIntrinsicInit) - { - return (node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero()); - } - } -#endif - #ifdef FEATURE_HW_INTRINSICS if (gtOper == GT_HWINTRINSIC) { - const GenTreeHWIntrinsic* node = AsHWIntrinsic(); - const var_types simdBaseType = node->GetSimdBaseType(); - - if (varTypeIsIntegral(simdBaseType) || varTypeIsFloating(simdBaseType)) - { - const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + const GenTreeHWIntrinsic* node = AsHWIntrinsic(); + const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - if (node->GetOperandCount() == 0) - { -#if defined(TARGET_XARCH) - return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero); -#elif defined(TARGET_ARM64) - return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero); -#endif // !TARGET_XARCH && !TARGET_ARM64 - } - else if ((node->GetOperandCount() == 1) && - (node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero())) - { #if defined(TARGET_XARCH) - return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create); + return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero); #elif defined(TARGET_ARM64) - return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create); + return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero); #endif // !TARGET_XARCH && !TARGET_ARM64 - } - } } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 414b82f5480c5..ed702b872e8e8 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -13516,6 +13516,89 @@ GenTree* Compiler::fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp) return cmp; } +#ifdef FEATURE_HW_INTRINSICS + +//------------------------------------------------------------------------ +// fgOptimizeHWIntrinsic: optimize a HW intrinsic node +// +// Arguments: +// node - HWIntrinsic node to examine +// +// Returns: +// The original node if no optimization happened or if tree bashing occured. +// An alternative tree if an optimization happened. +// +// Notes: +// Checks for HWIntrinsic nodes: Vector64.Create/Vector128.Create/Vector256.Create, +// and if the call is one of these, attempt to optimize. +// This is post-order, meaning that it will not morph the children. +// +GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) +{ + assert(!optValnumCSE_phase); + + if (opts.OptimizationDisabled()) + { + return node; + } + + switch (node->GetHWIntrinsicId()) + { + case NI_Vector128_Create: +#if defined(TARGET_XARCH) + case NI_Vector256_Create: +#elif defined(TARGET_ARM64) + case NI_Vector64_Create: +#endif + { + bool hwAllArgsAreConstZero = true; + for (GenTree* arg : node->Operands()) + { + if (!arg->IsIntegralConst(0) && !arg->IsFloatPositiveZero()) + { + hwAllArgsAreConstZero = false; + break; + } + } + + if (hwAllArgsAreConstZero) + { + switch (node->GetHWIntrinsicId()) + { + case NI_Vector128_Create: + { + node->ResetHWIntrinsicId(NI_Vector128_get_Zero); + break; + } +#if defined(TARGET_XARCH) + case NI_Vector256_Create: + { + node->ResetHWIntrinsicId(NI_Vector256_get_Zero); + break; + } +#elif defined(TARGET_ARM64) + case NI_Vector64_Create: + { + node->ResetHWIntrinsicId(NI_Vector64_get_Zero); + break; + } +#endif + default: + unreached(); + } + } + break; + } + + default: + break; + } + + return node; +} + +#endif + //------------------------------------------------------------------------ // fgOptimizeCommutativeArithmetic: Optimizes commutative operations. // @@ -14447,6 +14530,13 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) } #endif // defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) +#ifdef FEATURE_HW_INTRINSICS + if (multiOp->OperIsHWIntrinsic() && !optValnumCSE_phase) + { + return fgOptimizeHWIntrinsic(multiOp->AsHWIntrinsic()); + } +#endif + return multiOp; } #endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs b/src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs index 6e995575f534c..9524e8d666add 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs @@ -52,6 +52,30 @@ static Vector64 AdvSimd_CompareEqual_Vector64_Single_Zero(Vector64 return AdvSimd.CompareEqual(left, Vector64.Zero); } + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_CompareEqual_Vector64_Int32_CreateZero(Vector64 left) + { + return AdvSimd.CompareEqual(left, Vector64.Create(0)); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_CompareEqual_Vector64_Int32_CreateZeroZero(Vector64 left) + { + return AdvSimd.CompareEqual(left, Vector64.Create(0, 0)); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_CompareEqual_Vector64_Single_CreateZero(Vector64 left) + { + return AdvSimd.CompareEqual(left, Vector64.Create(0f)); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_CompareEqual_Vector64_Single_CreateZeroZero(Vector64 left) + { + return AdvSimd.CompareEqual(left, Vector64.Create(0f, 0f)); + } + [MethodImpl(MethodImplOptions.NoInlining)] static Vector128 AdvSimd_CompareEqual_Vector128_Byte_Zero(Vector128 left) { @@ -94,6 +118,37 @@ static Vector128 AdvSimd_CompareEqual_Vector128_Single_Zero(Vector128.Zero); } + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_CompareEqual_Vector128_Int32_CreateZero(Vector128 left) + { + return AdvSimd.CompareEqual(left, Vector128.Create(0)); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_CompareEqual_Vector128_Int32_CreateZeroZeroZeroZero(Vector128 left) + { + return AdvSimd.CompareEqual(left, Vector128.Create(0, 0, 0, 0)); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_CompareEqual_Vector128_Single_CreateZero(Vector128 left) + { + return AdvSimd.CompareEqual(left, Vector128.Create(0f)); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero(Vector128 left) + { + return AdvSimd.CompareEqual(left, Vector128.Create(0f, 0f, 0f, 0f)); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero_AsVariable(Vector128 left) + { + var asVar = Vector128.Create(0f, 0f, 0f, 0f); + return AdvSimd.CompareEqual(left, asVar); + } + [MethodImpl(MethodImplOptions.NoInlining)] static Vector128 AdvSimd_Arm64_CompareEqual_Vector128_Double_Zero(Vector128 left) { @@ -432,6 +487,8 @@ static int Tests_AdvSimd() // Begin CompareEqual Tests + // Vector64 + if (!ValidateResult_Vector64(AdvSimd_CompareEqual_Vector64_Byte_Zero(Vector64.Zero), Byte.MaxValue)) result = -1; @@ -453,6 +510,22 @@ static int Tests_AdvSimd() if (!ValidateResult_Vector64(AdvSimd_CompareEqual_Vector64_Single_Zero(Vector64.Zero), Single.NaN)) result = -1; + // Vector64.Create + + if (!ValidateResult_Vector64(AdvSimd_CompareEqual_Vector64_Int32_CreateZero(Vector64.Zero), -1)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_CompareEqual_Vector64_Single_CreateZero(Vector64.Zero), Single.NaN)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_CompareEqual_Vector64_Int32_CreateZeroZero(Vector64.Zero), -1)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_CompareEqual_Vector64_Single_CreateZeroZero(Vector64.Zero), Single.NaN)) + result = -1; + + // Vector128 + if (!ValidateResult_Vector128(AdvSimd_CompareEqual_Vector128_Byte_Zero(Vector128.Zero), Byte.MaxValue)) result = -1; @@ -474,6 +547,23 @@ static int Tests_AdvSimd() if (!ValidateResult_Vector128(AdvSimd_CompareEqual_Vector128_Single_Zero(Vector128.Zero), Single.NaN)) result = -1; + // Vector128.Create + + if (!ValidateResult_Vector128(AdvSimd_CompareEqual_Vector128_Int32_CreateZero(Vector128.Zero), -1)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareEqual_Vector128_Single_CreateZero(Vector128.Zero), Single.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareEqual_Vector128_Int32_CreateZeroZeroZeroZero(Vector128.Zero), -1)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero(Vector128.Zero), Single.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero_AsVariable(Vector128.Zero), Single.NaN)) + result = -1; + // End CompareEqual Tests // Begin CompareGreaterThan Tests @@ -589,6 +679,8 @@ static int Tests_AdvSimd_Swapped() // Begin CompareEqual Tests + // Vector64 + if (!ValidateResult_Vector64(AdvSimd_CompareEqual_Vector64_Byte_Zero_Swapped(Vector64.Zero), Byte.MaxValue)) result = -1; @@ -610,6 +702,8 @@ static int Tests_AdvSimd_Swapped() if (!ValidateResult_Vector64(AdvSimd_CompareEqual_Vector64_Single_Zero_Swapped(Vector64.Zero), Single.NaN)) result = -1; + // Vector128 + if (!ValidateResult_Vector128(AdvSimd_CompareEqual_Vector128_Byte_Zero_Swapped(Vector128.Zero), Byte.MaxValue)) result = -1; @@ -642,6 +736,8 @@ static int Tests_AdvSimd_Arm64() // Begin CompareEqual Tests + // Vector128 + if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareEqual_Vector128_Double_Zero(Vector128.Zero), Double.NaN)) result = -1; @@ -651,6 +747,8 @@ static int Tests_AdvSimd_Arm64() if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareEqual_Vector128_Int64_Zero(Vector128.Zero), -1)) result = -1; + // Vector64 + if (!ValidateResult_Vector64(AdvSimd_Arm64_CompareEqualScalar_Vector64_Single_Zero(Vector64.Zero), Vector64.CreateScalar(Single.NaN))) result = -1; @@ -674,6 +772,8 @@ static int Tests_AdvSimd_Arm64_Swapped() // Begin CompareEqual Tests + // Vector128 + if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareEqual_Vector128_Double_Zero_Swapped(Vector128.Zero), Double.NaN)) result = -1; @@ -683,6 +783,8 @@ static int Tests_AdvSimd_Arm64_Swapped() if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareEqual_Vector128_Int64_Zero_Swapped(Vector128.Zero), -1)) result = -1; + // Vector64 + if (!ValidateResult_Vector64(AdvSimd_Arm64_CompareEqualScalar_Vector64_Single_Zero_Swapped(Vector64.Zero), Vector64.CreateScalar(Single.NaN))) result = -1;