From eafb15b5c1a40584551cad8eda52563e0801077c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 09:51:42 -0700 Subject: [PATCH 01/24] Porting Ceiling and Floor to use SimdAsHWIntrinsic --- src/coreclr/src/jit/hwintrinsiclistarm64.h | 2 ++ src/coreclr/src/jit/simdashwintrinsiclistarm64.h | 2 ++ src/coreclr/src/jit/simdashwintrinsiclistxarch.h | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/src/coreclr/src/jit/hwintrinsiclistarm64.h b/src/coreclr/src/jit/hwintrinsiclistarm64.h index 80a5f487e2ff4..f42881ccff5a7 100644 --- a/src/coreclr/src/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/hwintrinsiclistarm64.h @@ -90,6 +90,7 @@ HARDWARE_INTRINSIC(AdvSimd, AddWideningUpper, 1 HARDWARE_INTRINSIC(AdvSimd, And, -1, 2, {INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, BitwiseClear, -1, 2, {INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, {INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -106,6 +107,7 @@ HARDWARE_INTRINSIC(AdvSimd, ExtractAndNarrowHigh, 1 HARDWARE_INTRINSIC(AdvSimd, ExtractAndNarrowLow, 8, 1, {INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, ExtractVector64, 8, 3, {INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_invalid, INS_invalid, INS_ext, INS_invalid}, HW_Category_IMM, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, ExtractVector128, 16, 3, {INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext}, HW_Category_IMM, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, Floor, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_frintm}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, FusedAddHalving, -1, 2, {INS_shadd, INS_uhadd, INS_shadd, INS_uhadd, INS_shadd, INS_uhadd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, FusedAddRoundedHalving, -1, 2, {INS_srhadd, INS_urhadd, INS_srhadd, INS_urhadd, INS_srhadd, INS_urhadd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, FusedMultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_HasRMWSemantics) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index cfd47939cf3dc..8d392f42bbf52 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -67,7 +67,9 @@ SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Ille // Vector Intrinsics SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Ceiling, NI_AdvSimd_Ceiling}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Floor, NI_AdvSimd_Floor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index 8f2ac6264041c..8e369257ac382 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -67,7 +67,9 @@ SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Ille // Vector Intrinsics SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Ceiling, NI_SSE41_Ceiling}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Floor, NI_SSE41_Floor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) @@ -89,7 +91,9 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2 // Vector Intrinsics SIMD_AS_HWINTRINSIC(VectorT256, Abs, 1, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC(VectorT256, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Ceiling, NI_AVX_Ceiling}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Floor, NI_AVX_Floor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) From 3e6e89746faf395a9af735ed7006e4a9338f19a5 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 10:25:00 -0700 Subject: [PATCH 02/24] Porting SquareRoot to use SimdAsHWIntrinsic --- src/coreclr/src/jit/simdashwintrinsiclistarm64.h | 4 ++++ src/coreclr/src/jit/simdashwintrinsiclistxarch.h | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index 8d392f42bbf52..5f6d3c67be623 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -33,6 +33,7 @@ SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA Function name NumArg Instructions Flags @@ -46,6 +47,7 @@ SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA Function name NumArg Instructions Flags @@ -59,6 +61,7 @@ SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA Function name NumArg Instructions Flags @@ -83,6 +86,7 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Ille SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_AdvSimd_Arm64_Sqrt}, SimdAsHWIntrinsicFlag::None) #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index 8e369257ac382..4c0e809cd4fe2 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -33,6 +33,7 @@ SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA Function name NumArg Instructions Flags @@ -46,6 +47,7 @@ SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA Function name NumArg Instructions Flags @@ -59,6 +61,7 @@ SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA Function name NumArg Instructions Flags @@ -83,6 +86,7 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Ille SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_SSE2_Sqrt}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA Function name NumArg Instructions Flags @@ -107,6 +111,7 @@ SIMD_AS_HWINTRINSIC(VectorT256, op_Division, 2, {NI_Ille SIMD_AS_HWINTRINSIC(VectorT256, op_ExclusiveOr, 2, {NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX_Xor, NI_AVX_Xor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Multiply, NI_AVX_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Subtraction, 2, {NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX_Subtract, NI_AVX_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Sqrt, NI_AVX_Sqrt}, SimdAsHWIntrinsicFlag::None) #endif // FEATURE_HW_INTRINSICS From c2de6275647d00c0656a11aff260d305e41c7529 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 11:10:23 -0700 Subject: [PATCH 03/24] Porting ConditionalSelect to use SimdAsHWIntrinsic --- src/coreclr/src/jit/simdashwintrinsic.cpp | 45 ++++++++++++++++++- .../src/jit/simdashwintrinsiclistarm64.h | 1 + .../src/jit/simdashwintrinsiclistxarch.h | 2 + 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 1463ed30ca407..f15ef3c85236a 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -320,6 +320,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; GenTree* op2 = nullptr; + GenTree* op3 = nullptr; SimdAsHWIntrinsicClassId classId = SimdAsHWIntrinsicInfo::lookupClassId(intrinsic); bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); @@ -692,6 +693,48 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } break; } + + case 3: + { + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(argList); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + + switch (intrinsic) + { +#if defined(TARGET_XARCH) + case NI_VectorT128_ConditionalSelect: + case NI_VectorT256_ConditionalSelect: + { + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op2, op3); + } +#elif defined(TARGET_ARM64) + case NI_VectorT128_ConditionalSelect: + { + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op2, op3); + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + } } assert(!"Unexpected SimdAsHWIntrinsic"); @@ -723,7 +766,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, { assert(featureSIMD); assert(retType != TYP_UNKNOWN); - assert(varTypeIsIntegral(baseType)); + assert(varTypeIsArithmetic(baseType)); assert(simdSize != 0); assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); assert(op1 != nullptr); diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index 5f6d3c67be623..108f8349e929b 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -71,6 +71,7 @@ SIMD_AS_HWINTRINSIC(Vector4, SquareRoot, 1, {NI_Ille SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Ceiling, NI_AdvSimd_Ceiling}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Floor, NI_AdvSimd_Floor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index 4c0e809cd4fe2..2fe2e7c778431 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -71,6 +71,7 @@ SIMD_AS_HWINTRINSIC(Vector4, SquareRoot, 1, {NI_Ille SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) SIMD_AS_HWINTRINSIC(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Ceiling, NI_SSE41_Ceiling}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Floor, NI_SSE41_Floor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) @@ -96,6 +97,7 @@ SIMD_AS_HWINTRINSIC(VectorT128, SquareRoot, 1, {NI_Ille SIMD_AS_HWINTRINSIC(VectorT256, Abs, 1, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) SIMD_AS_HWINTRINSIC(VectorT256, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Ceiling, NI_AVX_Ceiling}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, ConditionalSelect, 3, {NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Floor, NI_AVX_Floor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) From 7d92eae699942e01da093e81c086a9be935f43bd Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 15 May 2020 10:33:31 -0700 Subject: [PATCH 04/24] Porting get_AllBitsSet, get_Count, and get_Zero to use SimdAsHWIntrinsic --- src/coreclr/src/jit/compiler.h | 8 ++++++++ src/coreclr/src/jit/simdashwintrinsic.cpp | 6 ++++++ src/coreclr/src/jit/simdashwintrinsiclistarm64.h | 6 ++++++ src/coreclr/src/jit/simdashwintrinsiclistxarch.h | 9 +++++++++ .../System.Private.CoreLib/src/System/Numerics/Vector.cs | 8 ++++---- .../System.Private.CoreLib/src/System/Numerics/Vector.tt | 8 ++++---- .../src/System/Numerics/Vector_Operations.cs | 8 ++++---- 7 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 5a2dfcfbfb598..33e2dd380d6ad 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -2621,6 +2621,14 @@ class Compiler var_types baseType, unsigned size); + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode( + var_types type, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) + { + GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, hwIntrinsicID, baseType, size); + node->gtFlags |= GTF_SIMDASHW_OP; + return node; + } + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode( var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) { diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index f15ef3c85236a..d20ac143f6cb8 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -254,6 +254,12 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, switch (sig->numArgs) { + case 0: + { + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + return gtNewSimdAsHWIntrinsicNode(retType, hwIntrinsic, baseType, simdSize); + } + case 1: { argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index 108f8349e929b..47b80dc99fe78 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -27,6 +27,7 @@ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -41,6 +42,7 @@ SIMD_AS_HWINTRINSIC(Vector2, SquareRoot, 1, {NI_Ille // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -55,6 +57,7 @@ SIMD_AS_HWINTRINSIC(Vector3, SquareRoot, 1, {NI_Ille // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -74,6 +77,9 @@ SIMD_AS_HWINTRINSIC(VectorT128, Ceiling, 1, {NI_Ille SIMD_AS_HWINTRINSIC(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Floor, NI_AdvSimd_Floor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, get_Count, 0, {NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index 2fe2e7c778431..d1029a7905bfe 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -27,6 +27,7 @@ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -41,6 +42,7 @@ SIMD_AS_HWINTRINSIC(Vector2, SquareRoot, 1, {NI_Ille // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -55,6 +57,7 @@ SIMD_AS_HWINTRINSIC(Vector3, SquareRoot, 1, {NI_Ille // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -74,6 +77,9 @@ SIMD_AS_HWINTRINSIC(VectorT128, Ceiling, 1, {NI_Ille SIMD_AS_HWINTRINSIC(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Floor, NI_SSE41_Floor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, get_Count, 0, {NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) @@ -100,6 +106,9 @@ SIMD_AS_HWINTRINSIC(VectorT256, Ceiling, 1, {NI_Ille SIMD_AS_HWINTRINSIC(VectorT256, ConditionalSelect, 3, {NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Floor, NI_AVX_Floor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, get_AllBitsSet, 0, {NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, get_Count, 0, {NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, get_Zero, 0, {NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs index e27b5bf883d1c..cba0d4acc00e4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs @@ -84,12 +84,12 @@ public static Vector One } private static readonly Vector s_one = new Vector(GetOneValue()); - internal static Vector AllOnes + internal static Vector AllBitsSet { [Intrinsic] - get => s_allOnes; + get => s_allBitsSet; } - private static readonly Vector s_allOnes = new Vector(GetAllBitsSetValue()); + private static readonly Vector s_allBitsSet = new Vector(GetAllBitsSetValue()); #endregion Static Members #region Constructors @@ -1722,7 +1722,7 @@ public readonly bool TryCopyTo(Span destination) /// The one's complement vector. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator ~(Vector value) => - s_allOnes ^ value; + AllBitsSet ^ value; #endregion Bitwise Operators #region Logical Operators diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt index 86bd98984a947..bcfb968480438 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt @@ -89,12 +89,12 @@ namespace System.Numerics } private static readonly Vector s_one = new Vector(GetOneValue()); - internal static Vector AllOnes + internal static Vector AllBitsSet { [Intrinsic] - get => s_allOnes; + get => s_allBitsSet; } - private static readonly Vector s_allOnes = new Vector(GetAllBitsSetValue()); + private static readonly Vector s_allBitsSet = new Vector(GetAllBitsSetValue()); #endregion Static Members #region Constructors @@ -884,7 +884,7 @@ namespace System.Numerics /// The one's complement vector. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator ~(Vector value) => - s_allOnes ^ value; + AllBitsSet ^ value; #endregion Bitwise Operators #region Logical Operators diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs index f16d1c4fc5777..4d97d1ad70caa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs @@ -230,7 +230,7 @@ public static Vector LessThan(Vector left, Vector right) public static bool LessThanAll(Vector left, Vector right) where T : struct { Vector cond = (Vector)Vector.LessThan(left, right); - return cond.Equals(Vector.AllOnes); + return cond.Equals(Vector.AllBitsSet); } /// @@ -328,7 +328,7 @@ public static Vector LessThanOrEqual(Vector left, Vector r public static bool LessThanOrEqualAll(Vector left, Vector right) where T : struct { Vector cond = (Vector)Vector.LessThanOrEqual(left, right); - return cond.Equals(Vector.AllOnes); + return cond.Equals(Vector.AllBitsSet); } /// @@ -427,7 +427,7 @@ public static Vector GreaterThan(Vector left, Vector right) public static bool GreaterThanAll(Vector left, Vector right) where T : struct { Vector cond = (Vector)Vector.GreaterThan(left, right); - return cond.Equals(Vector.AllOnes); + return cond.Equals(Vector.AllBitsSet); } /// @@ -526,7 +526,7 @@ public static Vector GreaterThanOrEqual(Vector left, Vector(Vector left, Vector right) where T : struct { Vector cond = (Vector)Vector.GreaterThanOrEqual(left, right); - return cond.Equals(Vector.AllOnes); + return cond.Equals(Vector.AllBitsSet); } /// From 33c744ff3428ed399ce27b884490855eb0a62f6b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 15 May 2020 10:51:18 -0700 Subject: [PATCH 05/24] Porting op_Explicit to use SimdAsHWIntrinsic --- src/coreclr/src/jit/simdashwintrinsic.cpp | 19 +++++++++++++++++++ .../src/jit/simdashwintrinsiclistarm64.h | 1 + .../src/jit/simdashwintrinsiclistxarch.h | 2 ++ 3 files changed, 22 insertions(+) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index d20ac143f6cb8..ad8c5dc63db92 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -353,6 +353,25 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { case 1: { + bool isOpExplicit = (intrinsic == NI_VectorT128_op_Explicit); + +#if defined(TARGET_XARCH) + isOpExplicit |= (intrinsic == NI_VectorT256_op_Explicit); +#endif + + if (isOpExplicit) + { + // We fold away the cast here, as it only exists to satisfy the + // type system. It is safe to do this here since the op1 type + // and the signature return type are both the same TYP_SIMD. + + op1 = impSIMDPopStack(retType, /* expectAddr: */ false, sig->retTypeClass); + SetOpLclRelatedToSIMDIntrinsic(op1); + assert(op1->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + + return op1; + } + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index 47b80dc99fe78..2a6f380d2694a 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -91,6 +91,7 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_AdvS SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_AdvSimd_Arm64_Divide}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_AdvSimd_Arm64_Sqrt}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index d1029a7905bfe..aa8f4cb0a3078 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -91,6 +91,7 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2 SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_SSE2_Sqrt}, SimdAsHWIntrinsicFlag::None) @@ -120,6 +121,7 @@ SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseAnd, 2, {NI_AVX2 SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Divide, NI_AVX_Divide}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_ExclusiveOr, 2, {NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX_Xor, NI_AVX_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Explicit, 1, {NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Multiply, NI_AVX_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Subtraction, 2, {NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX_Subtract, NI_AVX_Subtract}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Sqrt, NI_AVX_Sqrt}, SimdAsHWIntrinsicFlag::None) From 2c2984ad6d09fd32627a0d991080cf02bd758d67 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 13:27:44 -0700 Subject: [PATCH 06/24] Porting op_Equality and op_Inequality to use SimdAsHWIntrinsic --- src/coreclr/src/jit/gentree.h | 45 +++- src/coreclr/src/jit/hwintrinsiclistarm64.h | 4 + src/coreclr/src/jit/hwintrinsiclistxarch.h | 8 +- src/coreclr/src/jit/lower.h | 1 + src/coreclr/src/jit/lowerarmarch.cpp | 120 +++++++++ src/coreclr/src/jit/lowerxarch.cpp | 232 ++++++++++++++++++ src/coreclr/src/jit/simdashwintrinsic.cpp | 6 +- .../src/jit/simdashwintrinsiclistarm64.h | 8 + .../src/jit/simdashwintrinsiclistxarch.h | 10 + src/coreclr/src/jit/vartype.h | 32 +++ 10 files changed, 460 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 7b7b5af861d21..7ba139dfa8893 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -6794,7 +6794,50 @@ inline bool GenTree::IsIntegralConstVector(ssize_t constVal) assert(gtGetOp2IfPresent() == nullptr); return true; } -#endif +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + if (gtOper == GT_HWINTRINSIC) + { + GenTreeHWIntrinsic* node = AsHWIntrinsic(); + + if (!varTypeIsIntegral(node->gtSIMDBaseType)) + { + // Can't be an integral constant + return false; + } + + GenTree* op1 = gtGetOp1(); + GenTree* op2 = gtGetOp2(); + + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + + if (op1 == nullptr) + { + assert(op2 == nullptr); + + if (constVal == 0) + { +#if defined(TARGET_XARCH) + return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero); +#elif defined(TARGET_ARM64) + return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero); +#endif // !TARGET_XARCH && !TARGET_ARM64 + } + } + else if ((op2 == nullptr) && !op1->OperIsList()) + { + if (op1->IsIntegralConst(constVal)) + { +#if defined(TARGET_XARCH) + return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create); +#elif defined(TARGET_ARM64) + return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create); +#endif // !TARGET_XARCH && !TARGET_ARM64 + } + } + } +#endif // FEATURE_HW_INTRINSICS return false; } diff --git a/src/coreclr/src/jit/hwintrinsiclistarm64.h b/src/coreclr/src/jit/hwintrinsiclistarm64.h index f42881ccff5a7..e66bc6fee30da 100644 --- a/src/coreclr/src/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/hwintrinsiclistarm64.h @@ -29,6 +29,8 @@ HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, HARDWARE_INTRINSIC(Vector64, get_Count, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, GetElement, 8, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_IMM, HW_Flag_NoJmpTableIMM|HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector64, op_Equality, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, op_Inequality, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, ToScalar, 8, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -53,6 +55,8 @@ HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 1 HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_IMM, HW_Flag_NoJmpTableIMM|HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index c041364c966bf..c6017fb12c44c 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -50,10 +50,12 @@ HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, ToVector256, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -80,9 +82,11 @@ HARDWARE_INTRINSIC(Vector256, get_Zero, HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags diff --git a/src/coreclr/src/jit/lower.h b/src/coreclr/src/jit/lower.h index ae19cd29c3c3b..b69e44b459fa7 100644 --- a/src/coreclr/src/jit/lower.h +++ b/src/coreclr/src/jit/lower.h @@ -315,6 +315,7 @@ class Lowering final : public Phase #ifdef FEATURE_HW_INTRINSICS void LowerHWIntrinsic(GenTreeHWIntrinsic* node); void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition); + void LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp); void LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node); void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index 899531b896d21..1f20b1a5cd227 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -553,6 +553,20 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return; } + case NI_Vector64_op_Equality: + case NI_Vector128_op_Equality: + { + LowerHWIntrinsicCmpOp(node, GT_EQ); + return; + } + + case NI_Vector64_op_Inequality: + case NI_Vector128_op_Inequality: + { + LowerHWIntrinsicCmpOp(node, GT_NE); + return; + } + default: break; } @@ -560,6 +574,112 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) ContainCheckHWIntrinsic(node); } +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic +// +// Arguments: +// node - The hardware intrinsic node. +// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE +// +void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +{ + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + var_types baseType = node->gtSIMDBaseType; + unsigned simdSize = node->gtSIMDSize; + var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + + assert((intrinsicId == NI_Vector64_op_Equality) || + (intrinsicId == NI_Vector64_op_Inequality) || + (intrinsicId == NI_Vector128_op_Equality) || + (intrinsicId == NI_Vector128_op_Inequality)); + + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(baseType)); + assert(simdSize != 0); + assert(node->gtType == TYP_BOOL); + assert((cmpOp == GT_EQ) || (cmpOp == GT_NE)); + + // We have the following (with the appropriate simd size and where the intrinsic could be op_Inequality): + // /--* op2 simd + // /--* op1 simd + // node = * HWINTRINSIC simd T op_Equality + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + NamedIntrinsic cmpIntrinsic; + + switch (baseType) + { + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + case TYP_INT: + case TYP_UINT: + case TYP_FLOAT: + { + cmpIntrinsic = NI_AdvSimd_CompareEqual; + break; + } + + case TYP_LONG: + case TYP_ULONG: + case TYP_DOUBLE: + { + cmpIntrinsic = NI_AdvSimd_Arm64_CompareEqual; + break; + } + + default: + { + unreached(); + } + } + + GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, baseType, simdSize); + BlockRange().InsertAfter(op2, cmp); + LowerNode(cmp); + + if ((baseType == TYP_FLOAT) && (simdSize == 12)) + { + // For TYP_SIMD12 we need to clear the upper bits and can't assume their value + + GenTree* idxCns = comp->gtNewIconNode(3, TYP_INT); + BlockRange().InsertAfter(cmp, idxCns); + + GenTree* insCns = comp->gtNewIconNode(cmpOp == GT_EQ ? -1 : 0, TYP_INT); + BlockRange().InsertAfter(idxCns, insCns); + + GenTree* tmp = comp->gtNewSimdAsHWIntrinsicNode(simdType, cmp, idxCns, insCns, NI_AdvSimd_Insert, TYP_INT, simdSize); + BlockRange().InsertAfter(insCns, tmp); + LowerNode(tmp); + + cmp = tmp; + } + + GenTree* msk = comp->gtNewSimdHWIntrinsicNode(simdType, cmp, NI_AdvSimd_Arm64_MinAcross, TYP_UBYTE, simdSize); + BlockRange().InsertAfter(cmp, msk); + LowerNode(msk); + + GenTree* zroCns = comp->gtNewIconNode(0, TYP_INT); + BlockRange().InsertAfter(msk, zroCns); + + GenTree* val = comp->gtNewSimdAsHWIntrinsicNode(simdType, msk, zroCns, NI_AdvSimd_Extract, TYP_UBYTE, simdSize); + BlockRange().InsertAfter(zroCns, val); + + zroCns = comp->gtNewIconNode(0, TYP_INT); + BlockRange().InsertAfter(val, zroCns); + + node->ChangeOper(cmpOp); + + node->gtType = TYP_INT; + node->gtOp1 = val; + node->gtOp2 = zroCns; + + LowerNode(node); +} + //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsicCreate: Lowers a Vector64 or Vector128 Create call // diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index f2a66d1166f76..4d0428d5b9e45 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -947,6 +947,20 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return; } + case NI_Vector128_op_Equality: + case NI_Vector256_op_Equality: + { + LowerHWIntrinsicCmpOp(node, GT_EQ); + return; + } + + case NI_Vector128_op_Inequality: + case NI_Vector256_op_Inequality: + { + LowerHWIntrinsicCmpOp(node, GT_NE); + return; + } + case NI_SSE2_CompareGreaterThan: { if (node->gtSIMDBaseType != TYP_DOUBLE) @@ -1100,6 +1114,224 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) ContainCheckHWIntrinsic(node); } +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic +// +// Arguments: +// node - The hardware intrinsic node. +// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE +// +void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +{ + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + var_types baseType = node->gtSIMDBaseType; + unsigned simdSize = node->gtSIMDSize; + var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + + assert((intrinsicId == NI_Vector128_op_Equality) || + (intrinsicId == NI_Vector128_op_Inequality) || + (intrinsicId == NI_Vector256_op_Equality) || + (intrinsicId == NI_Vector256_op_Inequality)); + + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(baseType)); + assert(simdSize != 0); + assert(node->gtType == TYP_BOOL); + assert((cmpOp == GT_EQ) || (cmpOp == GT_NE)); + + // We have the following (with the appropriate simd size and where the intrinsic could be op_Inequality): + // /--* op2 simd + // /--* op1 simd + // node = * HWINTRINSIC simd T op_Equality + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + if (op2->IsIntegralConstVector(0) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + // On SSE4.1 or higher we can optimize comparisons against zero to + // just use PTEST. We can't support it for floating-point, however, + // as it has both +0.0 and -0.0 where +0.0 == -0.0 + + GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE; + + if (simdSize == 32) + { + node->gtHWIntrinsicId = NI_AVX_TestZ; + LowerHWIntrinsicCC(node, NI_AVX_PTEST, cmpCnd); + } + else + { + node->gtHWIntrinsicId = NI_SSE41_TestZ; + LowerHWIntrinsicCC(node, NI_SSE41_PTEST, cmpCnd); + } + + return; + } + + NamedIntrinsic cmpIntrinsic; + var_types cmpType; + NamedIntrinsic mskIntrinsic; + var_types mskType; + int mskConstant; + + switch (baseType) + { + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + case TYP_INT: + case TYP_UINT: + { + cmpType = baseType; + mskType = TYP_UBYTE; + + if (simdSize == 32) + { + cmpIntrinsic = NI_AVX2_CompareEqual; + mskIntrinsic = NI_AVX2_MoveMask; + mskConstant = -1; + } + else + { + assert(simdSize == 16); + + cmpIntrinsic = NI_SSE2_CompareEqual; + mskIntrinsic = NI_SSE2_MoveMask; + mskConstant = 0xFFFF; + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + mskType = TYP_UBYTE; + + if (simdSize == 32) + { + cmpIntrinsic = NI_AVX2_CompareEqual; + cmpType = baseType; + mskIntrinsic = NI_AVX2_MoveMask; + mskConstant = -1; + } + else + { + assert(simdSize == 16); + + if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + cmpIntrinsic = NI_SSE41_CompareEqual; + cmpType = baseType; + } + else + { + cmpIntrinsic = NI_SSE2_CompareEqual; + cmpType = TYP_UINT; + } + + mskIntrinsic = NI_SSE2_MoveMask; + mskConstant = 0xFFFF; + } + break; + } + + case TYP_FLOAT: + { + cmpType = baseType; + mskType = baseType; + + if (simdSize == 32) + { + cmpIntrinsic = NI_AVX_CompareEqual; + mskIntrinsic = NI_AVX_MoveMask; + mskConstant = 0xFF; + } + else + { + cmpIntrinsic = NI_SSE_CompareEqual; + mskIntrinsic = NI_SSE_MoveMask; + + if (simdSize == 16) + { + mskConstant = 0xF; + } + else if (simdSize == 12) + { + mskConstant = 0x7; + } + else + { + assert(simdSize == 8); + mskConstant = 0x3; + } + } + break; + } + + case TYP_DOUBLE: + { + cmpType = baseType; + mskType = baseType; + + if (simdSize == 32) + { + cmpIntrinsic = NI_AVX_CompareEqual; + mskIntrinsic = NI_AVX_MoveMask; + mskConstant = 0xF; + } + else + { + assert(simdSize == 16); + + cmpIntrinsic = NI_SSE2_CompareEqual; + mskIntrinsic = NI_SSE2_MoveMask; + mskConstant = 0x3; + } + break; + } + + default: + { + unreached(); + } + } + + GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, cmpType, simdSize); + BlockRange().InsertAfter(op2, cmp); + LowerNode(cmp); + + GenTree* msk = comp->gtNewSimdHWIntrinsicNode(TYP_INT, cmp, mskIntrinsic, mskType, simdSize); + BlockRange().InsertAfter(cmp, msk); + LowerNode(msk); + + GenTree* mskCns = comp->gtNewIconNode(mskConstant, TYP_INT); + BlockRange().InsertAfter(msk, mskCns); + + if ((baseType == TYP_FLOAT) && (simdSize < 16)) + { + // For TYP_SIMD8 and TYP_SIMD12 we need to clear the upper bits and can't assume their value + + GenTree* tmp = comp->gtNewOperNode(GT_AND, TYP_INT, msk, mskCns); + BlockRange().InsertAfter(mskCns, tmp); + LowerNode(msk); + + msk = tmp; + + mskCns = comp->gtNewIconNode(mskConstant, TYP_INT); + BlockRange().InsertAfter(msk, mskCns); + } + + node->ChangeOper(cmpOp); + + node->gtType = TYP_INT; + node->gtOp1 = msk; + node->gtOp2 = mskCns; + + LowerNode(node); +} + //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsicCreate: Lowers a Vector128 or Vector256 Create call // diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index ad8c5dc63db92..2b7113e354b0a 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -164,6 +164,8 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, { assert(!mustExpand); + CORINFO_CLASS_HANDLE argClass; + if (!featureSIMD) { // We can't support SIMD intrinsics if the JIT doesn't support the feature @@ -180,8 +182,6 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); assert(simdSize != 0); - CORINFO_CLASS_HANDLE argClass; - if (retType == TYP_STRUCT) { baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize); @@ -707,7 +707,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } #else #error Unsupported platform -#endif // TARGET_XARCH +#endif // !TARGET_XARCH && !TARGET_ARM64 default: { diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index 2a6f380d2694a..9de5ce65c8d17 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -32,6 +32,8 @@ SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -47,6 +49,8 @@ SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -62,6 +66,8 @@ SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -90,8 +96,10 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_AdvS SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_AdvSimd_Arm64_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Equality, 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Inequality, 2, {NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_AdvSimd_Arm64_Sqrt}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index aa8f4cb0a3078..fef502163ee34 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -32,6 +32,8 @@ SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -47,6 +49,8 @@ SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -62,6 +66,8 @@ SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -90,8 +96,10 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_SSE2 SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE_And, NI_SSE2_And}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Equality, 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Inequality, 2, {NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_SSE2_Sqrt}, SimdAsHWIntrinsicFlag::None) @@ -120,8 +128,10 @@ SIMD_AS_HWINTRINSIC(VectorT256, op_Addition, 2, {NI_AVX2 SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseAnd, 2, {NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX_And, NI_AVX_And}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Divide, NI_AVX_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Equality, 2, {NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_ExclusiveOr, 2, {NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX_Xor, NI_AVX_Xor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Explicit, 1, {NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Inequality, 2, {NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Multiply, NI_AVX_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Subtraction, 2, {NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX_Subtract, NI_AVX_Subtract}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Sqrt, NI_AVX_Sqrt}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/vartype.h b/src/coreclr/src/jit/vartype.h index d69a75e5bfb54..e34ee7e5a8df8 100644 --- a/src/coreclr/src/jit/vartype.h +++ b/src/coreclr/src/jit/vartype.h @@ -113,6 +113,12 @@ inline bool varTypeIsUnsigned(T vt) return ((varTypeClassification[TypeGet(vt)] & (VTF_UNS)) != 0); } +template +inline bool varTypeIsSigned(T vt) +{ + return varTypeIsIntegralOrI(vt) && !varTypeIsUnsigned(vt); +} + // If "vt" is an unsigned integral type, returns the corresponding signed integral type, otherwise // return "vt". inline var_types varTypeUnsignedToSigned(var_types vt) @@ -140,6 +146,32 @@ inline var_types varTypeUnsignedToSigned(var_types vt) } } +// If "vt" is a signed integral type, returns the corresponding unsigned integral type, otherwise +// return "vt". +inline var_types varTypeSignedToUnsigned(var_types vt) +{ + if (varTypeIsSigned(vt)) + { + switch (vt) + { + case TYP_BYTE: + return TYP_UBYTE; + case TYP_SHORT: + return TYP_USHORT; + case TYP_INT: + return TYP_UINT; + case TYP_LONG: + return TYP_ULONG; + default: + unreached(); + } + } + else + { + return vt; + } +} + template inline bool varTypeIsFloating(T vt) { From 01084b477c04c4d2b7af5fd5f4c79e849f0539ac Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 3 May 2020 11:57:36 -0700 Subject: [PATCH 07/24] Changing Vector2/3/4 and Vector.Equals to forward to operator == --- .../src/System/Numerics/Vector.cs | 260 +++++++++--------- .../src/System/Numerics/Vector.tt | 113 ++++---- .../src/System/Numerics/Vector2.cs | 4 +- .../src/System/Numerics/Vector2_Intrinsics.cs | 6 +- .../src/System/Numerics/Vector3.cs | 4 +- .../src/System/Numerics/Vector3_Intrinsics.cs | 15 +- .../src/System/Numerics/Vector4.cs | 4 +- .../src/System/Numerics/Vector4_Intrinsics.cs | 11 +- 8 files changed, 200 insertions(+), 217 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs index cba0d4acc00e4..4cfebe72cab69 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs @@ -478,11 +478,7 @@ public readonly unsafe T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector)) - { - return false; - } - return Equals((Vector)obj); + return (obj is Vector other) && Equals(other); } /// @@ -493,130 +489,7 @@ public override readonly bool Equals(object? obj) [Intrinsic] public readonly bool Equals(Vector other) { - if (Vector.IsHardwareAccelerated) - { - for (int g = 0; g < Count; g++) - { - if (!ScalarEquals(this[g], other[g])) - { - return false; - } - } - return true; - } - else - { - if (typeof(T) == typeof(byte)) - { - return - this.register.byte_0 == other.register.byte_0 - && this.register.byte_1 == other.register.byte_1 - && this.register.byte_2 == other.register.byte_2 - && this.register.byte_3 == other.register.byte_3 - && this.register.byte_4 == other.register.byte_4 - && this.register.byte_5 == other.register.byte_5 - && this.register.byte_6 == other.register.byte_6 - && this.register.byte_7 == other.register.byte_7 - && this.register.byte_8 == other.register.byte_8 - && this.register.byte_9 == other.register.byte_9 - && this.register.byte_10 == other.register.byte_10 - && this.register.byte_11 == other.register.byte_11 - && this.register.byte_12 == other.register.byte_12 - && this.register.byte_13 == other.register.byte_13 - && this.register.byte_14 == other.register.byte_14 - && this.register.byte_15 == other.register.byte_15; - } - else if (typeof(T) == typeof(sbyte)) - { - return - this.register.sbyte_0 == other.register.sbyte_0 - && this.register.sbyte_1 == other.register.sbyte_1 - && this.register.sbyte_2 == other.register.sbyte_2 - && this.register.sbyte_3 == other.register.sbyte_3 - && this.register.sbyte_4 == other.register.sbyte_4 - && this.register.sbyte_5 == other.register.sbyte_5 - && this.register.sbyte_6 == other.register.sbyte_6 - && this.register.sbyte_7 == other.register.sbyte_7 - && this.register.sbyte_8 == other.register.sbyte_8 - && this.register.sbyte_9 == other.register.sbyte_9 - && this.register.sbyte_10 == other.register.sbyte_10 - && this.register.sbyte_11 == other.register.sbyte_11 - && this.register.sbyte_12 == other.register.sbyte_12 - && this.register.sbyte_13 == other.register.sbyte_13 - && this.register.sbyte_14 == other.register.sbyte_14 - && this.register.sbyte_15 == other.register.sbyte_15; - } - else if (typeof(T) == typeof(ushort)) - { - return - this.register.uint16_0 == other.register.uint16_0 - && this.register.uint16_1 == other.register.uint16_1 - && this.register.uint16_2 == other.register.uint16_2 - && this.register.uint16_3 == other.register.uint16_3 - && this.register.uint16_4 == other.register.uint16_4 - && this.register.uint16_5 == other.register.uint16_5 - && this.register.uint16_6 == other.register.uint16_6 - && this.register.uint16_7 == other.register.uint16_7; - } - else if (typeof(T) == typeof(short)) - { - return - this.register.int16_0 == other.register.int16_0 - && this.register.int16_1 == other.register.int16_1 - && this.register.int16_2 == other.register.int16_2 - && this.register.int16_3 == other.register.int16_3 - && this.register.int16_4 == other.register.int16_4 - && this.register.int16_5 == other.register.int16_5 - && this.register.int16_6 == other.register.int16_6 - && this.register.int16_7 == other.register.int16_7; - } - else if (typeof(T) == typeof(uint)) - { - return - this.register.uint32_0 == other.register.uint32_0 - && this.register.uint32_1 == other.register.uint32_1 - && this.register.uint32_2 == other.register.uint32_2 - && this.register.uint32_3 == other.register.uint32_3; - } - else if (typeof(T) == typeof(int)) - { - return - this.register.int32_0 == other.register.int32_0 - && this.register.int32_1 == other.register.int32_1 - && this.register.int32_2 == other.register.int32_2 - && this.register.int32_3 == other.register.int32_3; - } - else if (typeof(T) == typeof(ulong)) - { - return - this.register.uint64_0 == other.register.uint64_0 - && this.register.uint64_1 == other.register.uint64_1; - } - else if (typeof(T) == typeof(long)) - { - return - this.register.int64_0 == other.register.int64_0 - && this.register.int64_1 == other.register.int64_1; - } - else if (typeof(T) == typeof(float)) - { - return - this.register.single_0 == other.register.single_0 - && this.register.single_1 == other.register.single_1 - && this.register.single_2 == other.register.single_2 - && this.register.single_3 == other.register.single_3; - } - else if (typeof(T) == typeof(double)) - { - return - this.register.double_0 == other.register.double_0 - && this.register.double_1 == other.register.double_1; - } - else - { - throw new NotSupportedException(SR.Arg_TypeNotSupported); - } - } + return this == other; } /// @@ -1734,8 +1607,133 @@ public readonly bool TryCopyTo(Span destination) /// True if all elements are equal; False otherwise. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool operator ==(Vector left, Vector right) => - left.Equals(right); + public static bool operator ==(Vector left, Vector right) + { + if (Vector.IsHardwareAccelerated) + { + for (int g = 0; g < Count; g++) + { + if (!ScalarEquals(left[g], right[g])) + { + return false; + } + } + return true; + } + else + { + if (typeof(T) == typeof(byte)) + { + return + left.register.byte_0 == right.register.byte_0 + && left.register.byte_1 == right.register.byte_1 + && left.register.byte_2 == right.register.byte_2 + && left.register.byte_3 == right.register.byte_3 + && left.register.byte_4 == right.register.byte_4 + && left.register.byte_5 == right.register.byte_5 + && left.register.byte_6 == right.register.byte_6 + && left.register.byte_7 == right.register.byte_7 + && left.register.byte_8 == right.register.byte_8 + && left.register.byte_9 == right.register.byte_9 + && left.register.byte_10 == right.register.byte_10 + && left.register.byte_11 == right.register.byte_11 + && left.register.byte_12 == right.register.byte_12 + && left.register.byte_13 == right.register.byte_13 + && left.register.byte_14 == right.register.byte_14 + && left.register.byte_15 == right.register.byte_15; + } + else if (typeof(T) == typeof(sbyte)) + { + return + left.register.sbyte_0 == right.register.sbyte_0 + && left.register.sbyte_1 == right.register.sbyte_1 + && left.register.sbyte_2 == right.register.sbyte_2 + && left.register.sbyte_3 == right.register.sbyte_3 + && left.register.sbyte_4 == right.register.sbyte_4 + && left.register.sbyte_5 == right.register.sbyte_5 + && left.register.sbyte_6 == right.register.sbyte_6 + && left.register.sbyte_7 == right.register.sbyte_7 + && left.register.sbyte_8 == right.register.sbyte_8 + && left.register.sbyte_9 == right.register.sbyte_9 + && left.register.sbyte_10 == right.register.sbyte_10 + && left.register.sbyte_11 == right.register.sbyte_11 + && left.register.sbyte_12 == right.register.sbyte_12 + && left.register.sbyte_13 == right.register.sbyte_13 + && left.register.sbyte_14 == right.register.sbyte_14 + && left.register.sbyte_15 == right.register.sbyte_15; + } + else if (typeof(T) == typeof(ushort)) + { + return + left.register.uint16_0 == right.register.uint16_0 + && left.register.uint16_1 == right.register.uint16_1 + && left.register.uint16_2 == right.register.uint16_2 + && left.register.uint16_3 == right.register.uint16_3 + && left.register.uint16_4 == right.register.uint16_4 + && left.register.uint16_5 == right.register.uint16_5 + && left.register.uint16_6 == right.register.uint16_6 + && left.register.uint16_7 == right.register.uint16_7; + } + else if (typeof(T) == typeof(short)) + { + return + left.register.int16_0 == right.register.int16_0 + && left.register.int16_1 == right.register.int16_1 + && left.register.int16_2 == right.register.int16_2 + && left.register.int16_3 == right.register.int16_3 + && left.register.int16_4 == right.register.int16_4 + && left.register.int16_5 == right.register.int16_5 + && left.register.int16_6 == right.register.int16_6 + && left.register.int16_7 == right.register.int16_7; + } + else if (typeof(T) == typeof(uint)) + { + return + left.register.uint32_0 == right.register.uint32_0 + && left.register.uint32_1 == right.register.uint32_1 + && left.register.uint32_2 == right.register.uint32_2 + && left.register.uint32_3 == right.register.uint32_3; + } + else if (typeof(T) == typeof(int)) + { + return + left.register.int32_0 == right.register.int32_0 + && left.register.int32_1 == right.register.int32_1 + && left.register.int32_2 == right.register.int32_2 + && left.register.int32_3 == right.register.int32_3; + } + else if (typeof(T) == typeof(ulong)) + { + return + left.register.uint64_0 == right.register.uint64_0 + && left.register.uint64_1 == right.register.uint64_1; + } + else if (typeof(T) == typeof(long)) + { + return + left.register.int64_0 == right.register.int64_0 + && left.register.int64_1 == right.register.int64_1; + } + else if (typeof(T) == typeof(float)) + { + return + left.register.single_0 == right.register.single_0 + && left.register.single_1 == right.register.single_1 + && left.register.single_2 == right.register.single_2 + && left.register.single_3 == right.register.single_3; + } + else if (typeof(T) == typeof(double)) + { + return + left.register.double_0 == right.register.double_0 + && left.register.double_1 == right.register.double_1; + } + else + { + throw new NotSupportedException(SR.Arg_TypeNotSupported); + } + } + } /// /// Returns a boolean indicating whether any single pair of elements in the given vectors are not equal. diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt index bcfb968480438..d0d379b727ae0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt @@ -322,11 +322,7 @@ namespace System.Numerics [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector)) - { - return false; - } - return Equals((Vector)obj); + return (obj is Vector other) && Equals(other); } /// @@ -334,59 +330,9 @@ namespace System.Numerics /// /// The vector to compare this instance to. /// True if the other vector is equal to this instance; False otherwise. - [Intrinsic] public readonly bool Equals(Vector other) { - if (Vector.IsHardwareAccelerated) - { - for (int g = 0; g < Count; g++) - { - if (!ScalarEquals(this[g], other[g])) - { - return false; - } - } - return true; - } - else - { -<# - foreach (Type type in supportedTypes) - { -#> - <#=GenerateIfStatementHeader(type)#> - { - return -<# - for (int g = 0; g < GetNumFields(type, totalSize); g++) - { -#> -<# - if (g == 0) - { -#> - this.<#=GetRegisterFieldName(type, g)#> == other.<#=GetRegisterFieldName(type, g)#> -<# - } - else - { -#> - && this.<#=GetRegisterFieldName(type, g)#> == other.<#=GetRegisterFieldName(type, g)#><#=(g == (GetNumFields(type, totalSize) -1)) ? ";" : ""#> -<# - } -#> -<# - } -#> - } -<# - } -#> - else - { - throw new NotSupportedException(SR.Arg_TypeNotSupported); - } - } + return this == other; } /// @@ -896,8 +842,59 @@ namespace System.Numerics /// True if all elements are equal; False otherwise. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool operator ==(Vector left, Vector right) => - left.Equals(right); + public static bool operator ==(Vector left, Vector right) + { + if (Vector.IsHardwareAccelerated) + { + for (int g = 0; g < Count; g++) + { + if (!ScalarEquals(left[g], right[g])) + { + return false; + } + } + return true; + } + else + { +<# + foreach (Type type in supportedTypes) + { +#> + <#=GenerateIfStatementHeader(type)#> + { + return +<# + for (int g = 0; g < GetNumFields(type, totalSize); g++) + { +#> +<# + if (g == 0) + { +#> + left.<#=GetRegisterFieldName(type, g)#> == right.<#=GetRegisterFieldName(type, g)#> +<# + } + else + { +#> + && left.<#=GetRegisterFieldName(type, g)#> == right.<#=GetRegisterFieldName(type, g)#><#=(g == (GetNumFields(type, totalSize) -1)) ? ";" : ""#> +<# + } +#> +<# + } +#> + } +<# + } +#> + else + { + throw new NotSupportedException(SR.Arg_TypeNotSupported); + } + } + } /// /// Returns a boolean indicating whether any single pair of elements in the given vectors are not equal. diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs index 38b572207d916..cee4a0e4ca66a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs @@ -65,9 +65,7 @@ public override readonly int GetHashCode() [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector2)) - return false; - return Equals((Vector2)obj); + return (obj is Vector2 other) && Equals(other); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs index 27d3469bfb92c..f2bfc88180731 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs @@ -88,10 +88,9 @@ public readonly void CopyTo(float[] array, int index) /// /// The Vector2 to compare this instance to. /// True if the other Vector2 is equal to this instance; False otherwise. - [Intrinsic] public readonly bool Equals(Vector2 other) { - return this.X == other.X && this.Y == other.Y; + return this == other; } #endregion Public Instance Methods @@ -275,7 +274,8 @@ public static Vector2 SquareRoot(Vector2 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator ==(Vector2 left, Vector2 right) { - return left.Equals(right); + return left.X == right.X && + left.Y == right.Y; } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs index af110ffcc56d5..1ca945caa5bf5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs @@ -70,9 +70,7 @@ public override readonly int GetHashCode() [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector3)) - return false; - return Equals((Vector3)obj); + return (obj is Vector3 other) && Equals(other); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs index df32e8331d70c..52877c54b16a8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs @@ -103,12 +103,9 @@ public readonly void CopyTo(float[] array, int index) /// /// The Vector3 to compare this instance to. /// True if the other Vector3 is equal to this instance; False otherwise. - [Intrinsic] public readonly bool Equals(Vector3 other) { - return X == other.X && - Y == other.Y && - Z == other.Z; + return this == other; } #endregion Public Instance Methods @@ -294,9 +291,9 @@ public static Vector3 SquareRoot(Vector3 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator ==(Vector3 left, Vector3 right) { - return (left.X == right.X && - left.Y == right.Y && - left.Z == right.Z); + return left.X == right.X && + left.Y == right.Y && + left.Z == right.Z; } /// @@ -309,9 +306,7 @@ public static Vector3 SquareRoot(Vector3 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator !=(Vector3 left, Vector3 right) { - return (left.X != right.X || - left.Y != right.Y || - left.Z != right.Z); + return !(left == right); } #endregion Public Static Operators } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs index 45a9f42264b5e..c6dc6a8de8c4f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs @@ -73,9 +73,7 @@ public override readonly int GetHashCode() [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector4)) - return false; - return Equals((Vector4)obj); + return (obj is Vector4 other) && Equals(other); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs index 70d692457e1a5..ee2e54842df38 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs @@ -133,13 +133,9 @@ public readonly void CopyTo(float[] array, int index) /// /// The Vector4 to compare this instance to. /// True if the other Vector4 is equal to this instance; False otherwise. - [Intrinsic] public readonly bool Equals(Vector4 other) { - return this.X == other.X - && this.Y == other.Y - && this.Z == other.Z - && this.W == other.W; + return this == other; } #endregion Public Instance Methods @@ -329,7 +325,10 @@ public static Vector4 SquareRoot(Vector4 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator ==(Vector4 left, Vector4 right) { - return left.Equals(right); + return left.X == right.X + && left.Y == right.Y + && left.Z == right.Z + && left.W == right.W; } /// From 66417db6ec582b979ca46e3342b05b1b8c3b8877 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 12:40:29 -0700 Subject: [PATCH 08/24] Removing SIMDIntrinsicAbs --- src/coreclr/src/jit/codegenarm64.cpp | 9 -- src/coreclr/src/jit/compiler.h | 3 - src/coreclr/src/jit/lsraarm64.cpp | 1 - src/coreclr/src/jit/lsraxarch.cpp | 10 -- src/coreclr/src/jit/simd.cpp | 164 ----------------------- src/coreclr/src/jit/simdcodegenxarch.cpp | 22 +-- src/coreclr/src/jit/simdintrinsiclist.h | 1 - 7 files changed, 1 insertion(+), 209 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index c3f7c07bbfcf8..39fd44953a9f0 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3822,7 +3822,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) break; case SIMDIntrinsicSqrt: - case SIMDIntrinsicAbs: case SIMDIntrinsicCast: case SIMDIntrinsicConvertToSingle: case SIMDIntrinsicConvertToInt32: @@ -3949,9 +3948,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { switch (intrinsicId) { - case SIMDIntrinsicAbs: - result = INS_fabs; - break; case SIMDIntrinsicAdd: result = INS_fadd; break; @@ -4038,10 +4034,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type switch (intrinsicId) { - case SIMDIntrinsicAbs: - assert(!isUnsigned); - result = INS_abs; - break; case SIMDIntrinsicAdd: result = INS_add; break; @@ -4257,7 +4249,6 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble || diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 33e2dd380d6ad..13f45c3bc5a99 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -8109,9 +8109,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX GenTree** op1, GenTree** op2); - // Creates a GT_SIMD tree for Abs intrinsic. - GenTree* impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned simdVectorSize, GenTree* op1); - #if defined(TARGET_XARCH) // Transforms operands and returns the SIMD intrinsic to be applied on diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index c69a6cfab03cd..49f34608d82c2 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -803,7 +803,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicInit: case SIMDIntrinsicCast: case SIMDIntrinsicSqrt: - case SIMDIntrinsicAbs: case SIMDIntrinsicConvertToSingle: case SIMDIntrinsicConvertToInt32: case SIMDIntrinsicConvertToDouble: diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 772ab8c12c208..0e37a014059cb 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1960,16 +1960,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); break; - case SIMDIntrinsicAbs: - // float/double vectors: This gets implemented as bitwise-And operation - // with a mask and hence should never see here. - // - // Must be a Vector or Vector Vector - assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT || - simdTree->gtSIMDBaseType == TYP_BYTE); - assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported); - break; - case SIMDIntrinsicSqrt: // SSE2 has no instruction support for sqrt on integer vectors. noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index bfd8c04f76789..1150fdefca697 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1078,7 +1078,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in case SIMDIntrinsicSqrt: case SIMDIntrinsicMin: case SIMDIntrinsicMax: - case SIMDIntrinsicAbs: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicLessThanOrEqual: @@ -1666,164 +1665,6 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, return intrinsicID; } -//------------------------------------------------------------------------- -// impSIMDAbs: creates GT_SIMD node to compute Abs value of a given vector. -// -// Arguments: -// typeHnd - type handle of SIMD vector -// baseType - base type of vector -// size - vector size in bytes -// op1 - operand of Abs intrinsic -// -GenTree* Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned size, GenTree* op1) -{ - assert(varTypeIsSIMD(op1)); - - var_types simdType = op1->TypeGet(); - GenTree* retVal = nullptr; - -#ifdef TARGET_XARCH - // When there is no direct support, Abs(v) could be computed - // on integer vectors as follows: - // BitVector = v < vector.Zero - // result = ConditionalSelect(BitVector, vector.Zero - v, v) - - bool useConditionalSelect = false; - if (getSIMDSupportLevel() == SIMD_SSE2_Supported) - { - // SSE2 doesn't support abs on signed integer type vectors. - if (baseType == TYP_LONG || baseType == TYP_INT || baseType == TYP_SHORT || baseType == TYP_BYTE) - { - useConditionalSelect = true; - } - } - else - { - assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported); - if (baseType == TYP_LONG) - { - // SSE4/AVX2 don't support abs on long type vector. - useConditionalSelect = true; - } - } - - if (useConditionalSelect) - { - // This works only on integer vectors not on float/double vectors. - assert(varTypeIsIntegral(baseType)); - - GenTree* op1Assign; - unsigned op1LclNum; - - if (op1->OperGet() == GT_LCL_VAR) - { - op1LclNum = op1->AsLclVarCommon()->GetLclNum(); - op1Assign = nullptr; - } - else - { - op1LclNum = lvaGrabTemp(true DEBUGARG("SIMD Abs op1")); - lvaSetStruct(op1LclNum, typeHnd, false); - op1Assign = gtNewTempAssign(op1LclNum, op1); - op1 = gtNewLclvNode(op1LclNum, op1->TypeGet()); - } - - // Assign Vector.Zero to a temp since it is needed more than once - GenTree* vecZero = gtNewSIMDVectorZero(simdType, baseType, size); - unsigned vecZeroLclNum = lvaGrabTemp(true DEBUGARG("SIMD Abs VecZero")); - lvaSetStruct(vecZeroLclNum, typeHnd, false); - GenTree* vecZeroAssign = gtNewTempAssign(vecZeroLclNum, vecZero); - - // Construct BitVector = v < vector.Zero - GenTree* bitVecOp1 = op1; - GenTree* bitVecOp2 = gtNewLclvNode(vecZeroLclNum, vecZero->TypeGet()); - var_types relOpBaseType = baseType; - SIMDIntrinsicID relOpIntrinsic = - impSIMDRelOp(SIMDIntrinsicLessThan, typeHnd, size, &relOpBaseType, &bitVecOp1, &bitVecOp2); - GenTree* bitVec = gtNewSIMDNode(simdType, bitVecOp1, bitVecOp2, relOpIntrinsic, relOpBaseType, size); - unsigned bitVecLclNum = lvaGrabTemp(true DEBUGARG("SIMD Abs bitVec")); - lvaSetStruct(bitVecLclNum, typeHnd, false); - GenTree* bitVecAssign = gtNewTempAssign(bitVecLclNum, bitVec); - bitVec = gtNewLclvNode(bitVecLclNum, bitVec->TypeGet()); - - // Construct condSelectOp1 = vector.Zero - v - GenTree* subOp1 = gtNewLclvNode(vecZeroLclNum, vecZero->TypeGet()); - GenTree* subOp2 = gtNewLclvNode(op1LclNum, op1->TypeGet()); - GenTree* negVec = gtNewSIMDNode(simdType, subOp1, subOp2, SIMDIntrinsicSub, baseType, size); - - // Construct ConditionalSelect(bitVec, vector.Zero - v, v) - GenTree* vec = gtNewLclvNode(op1LclNum, op1->TypeGet()); - retVal = impSIMDSelect(typeHnd, baseType, size, bitVec, negVec, vec); - - // Prepend bitVec assignment to retVal. - // retVal = (tmp2 = v < tmp1), CondSelect(tmp2, tmp1 - v, v) - retVal = gtNewOperNode(GT_COMMA, simdType, bitVecAssign, retVal); - - // Prepend vecZero assignment to retVal. - // retVal = (tmp1 = vector.Zero), (tmp2 = v < tmp1), CondSelect(tmp2, tmp1 - v, v) - retVal = gtNewOperNode(GT_COMMA, simdType, vecZeroAssign, retVal); - - // If op1 was assigned to a temp, prepend that to retVal. - if (op1Assign != nullptr) - { - // retVal = (v=op1), (tmp1 = vector.Zero), (tmp2 = v < tmp1), CondSelect(tmp2, tmp1 - v, v) - retVal = gtNewOperNode(GT_COMMA, simdType, op1Assign, retVal); - } - } - else if (varTypeIsFloating(baseType)) - { - // Abs(vf) = vf & new SIMDVector(0x7fffffff); - // Abs(vd) = vf & new SIMDVector(0x7fffffffffffffff); - GenTree* bitMask = nullptr; - if (baseType == TYP_FLOAT) - { - float f; - static_assert_no_msg(sizeof(float) == sizeof(int)); - *((int*)&f) = 0x7fffffff; - bitMask = gtNewDconNode(f); - } - else if (baseType == TYP_DOUBLE) - { - double d; - static_assert_no_msg(sizeof(double) == sizeof(__int64)); - *((__int64*)&d) = 0x7fffffffffffffffLL; - bitMask = gtNewDconNode(d); - } - - assert(bitMask != nullptr); - bitMask->gtType = baseType; - GenTree* bitMaskVector = gtNewSIMDNode(simdType, bitMask, SIMDIntrinsicInit, baseType, size); - retVal = gtNewSIMDNode(simdType, op1, bitMaskVector, SIMDIntrinsicBitwiseAnd, baseType, size); - } - else if (baseType == TYP_USHORT || baseType == TYP_UBYTE || baseType == TYP_UINT || baseType == TYP_ULONG) - { - // Abs is a no-op on unsigned integer type vectors - retVal = op1; - } - else - { - assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported); - assert(baseType != TYP_LONG); - - retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size); - } -#elif defined(TARGET_ARM64) - if (varTypeIsUnsigned(baseType)) - { - // Abs is a no-op on unsigned integer type vectors - retVal = op1; - } - else - { - retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size); - } -#else // !defined(TARGET_XARCH)_ && !defined(TARGET_ARM64) - assert(!"Abs intrinsic on non-xarch target not implemented"); -#endif // !TARGET_XARCH - - return retVal; -} - // Creates a GT_SIMD tree for Select operation // // Arguments: @@ -3150,11 +2991,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, retVal = gtNewSIMDNode(genActualType(callType), op1, simdIntrinsicID, baseType, size); break; - case SIMDIntrinsicAbs: - op1 = impSIMDPopStack(simdType); - retVal = impSIMDAbs(clsHnd, baseType, size, op1); - break; - case SIMDIntrinsicGetW: retVal = impSIMDGetFixed(simdType, baseType, size, 3); break; diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index b777d1da4eeed..608a769adf986 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -317,24 +317,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicAbs: - if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) - { - if (baseType == TYP_INT) - { - result = INS_pabsd; - } - else if (baseType == TYP_SHORT) - { - result = INS_pabsw; - } - else if (baseType == TYP_BYTE) - { - result = INS_pabsb; - } - } - break; - case SIMDIntrinsicEqual: if (baseType == TYP_FLOAT) { @@ -1062,8 +1044,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs); + assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast); GenTree* op1 = simdNode->gtGetOp1(); var_types baseType = simdNode->gtSIMDBaseType; @@ -3226,7 +3207,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicSqrt: case SIMDIntrinsicCast: - case SIMDIntrinsicAbs: genSIMDIntrinsicUnOp(simdNode); break; diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index 7b535c0112dc8..c048b87cf3734 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -104,7 +104,6 @@ SIMD_INTRINSIC("Floor", false, Floor, SIMD_INTRINSIC("Min", false, Min, "min", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("Max", false, Max, "max", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) // Vector Relational operators SIMD_INTRINSIC("Equals", false, Equal, "eq", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) From 56db5f9e9c539ecb3a44f242d960853b946f026c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 12:44:18 -0700 Subject: [PATCH 09/24] Removing SIMDIntrinsicMax and SIMDIntrinsicMin --- src/coreclr/src/jit/codegenarm64.cpp | 17 +- src/coreclr/src/jit/compiler.h | 8 - src/coreclr/src/jit/gentree.cpp | 2 - src/coreclr/src/jit/lsraarm64.cpp | 2 - src/coreclr/src/jit/lsraxarch.cpp | 2 - src/coreclr/src/jit/simd.cpp | 201 ----------------------- src/coreclr/src/jit/simdcodegenxarch.cpp | 89 +--------- src/coreclr/src/jit/simdintrinsiclist.h | 3 - 8 files changed, 2 insertions(+), 322 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 39fd44953a9f0..cb494428b3b71 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3849,8 +3849,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicGreaterThan: @@ -3988,12 +3986,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicLessThanOrEqual: result = INS_fcmle; break; - case SIMDIntrinsicMax: - result = INS_fmax; - break; - case SIMDIntrinsicMin: - result = INS_fmin; - break; case SIMDIntrinsicMul: result = INS_fmul; break; @@ -4073,12 +4065,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type assert(!isUnsigned); result = INS_cmle; break; - case SIMDIntrinsicMax: - result = isUnsigned ? INS_umax : INS_smax; - break; - case SIMDIntrinsicMin: - result = isUnsigned ? INS_umin : INS_smin; - break; case SIMDIntrinsicMul: result = INS_mul; break; @@ -4411,8 +4397,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual || + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThanOrEqual || diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 13f45c3bc5a99..81b1da528a6d3 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -8092,14 +8092,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX GenTree* op2, GenTree* op3); - // Creates a GT_SIMD tree for Min/Max operation - GenTree* impSIMDMinMax(SIMDIntrinsicID intrinsicId, - CORINFO_CLASS_HANDLE typeHnd, - var_types baseType, - unsigned simdVectorSize, - GenTree* op1, - GenTree* op2); - // Transforms operands and returns the SIMD intrinsic to be applied on // transformed operands to obtain given relop result. SIMDIntrinsicID impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index f3208926e3bed..194d44436b0c0 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -18444,8 +18444,6 @@ bool GenTree::isCommutativeSIMDIntrinsic() case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicEqual: - case SIMDIntrinsicMax: - case SIMDIntrinsicMin: case SIMDIntrinsicMul: case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 49f34608d82c2..863f09ee2e63b 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -860,8 +860,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicGreaterThan: diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 0e37a014059cb..544f3ef2884df 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1977,8 +1977,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: // SSE2 32-bit integer multiplication requires two temp regs if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT && compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 1150fdefca697..09212bfa0dbcd 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1076,8 +1076,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicSqrt: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicLessThanOrEqual: @@ -1725,193 +1723,6 @@ GenTree* Compiler::impSIMDSelect( return simdTree; } -// Creates a GT_SIMD tree for Min/Max operation -// -// Arguments: -// IntrinsicId - SIMD intrinsic Id, either Min or Max -// typeHnd - type handle of SIMD vector -// baseType - base type of SIMD vector -// size - SIMD vector size -// op1 - first operand = va -// op2 - second operand = vb -// -// Return Value: -// Returns GT_SIMD tree that computes Max(va, vb) -// -GenTree* Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, - CORINFO_CLASS_HANDLE typeHnd, - var_types baseType, - unsigned size, - GenTree* op1, - GenTree* op2) -{ - assert(intrinsicId == SIMDIntrinsicMin || intrinsicId == SIMDIntrinsicMax); - assert(varTypeIsSIMD(op1)); - var_types simdType = op1->TypeGet(); - assert(op2->TypeGet() == simdType); - -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) - GenTree* simdTree = nullptr; - -#ifdef TARGET_XARCH - // SSE2 has direct support for float/double/signed word/unsigned byte. - // SSE4.1 has direct support for int32/uint32/signed byte/unsigned word. - // For other integer types we compute min/max as follows - // - // int32/uint32 (SSE2) - // int64/uint64 (SSE2&SSE4): - // compResult = (op1 < op2) in case of Min - // (op1 > op2) in case of Max - // Min/Max(op1, op2) = Select(compResult, op1, op2) - // - // unsigned word (SSE2): - // op1 = op1 - 2^15 ; to make it fit within a signed word - // op2 = op2 - 2^15 ; to make it fit within a signed word - // result = SSE2 signed word Min/Max(op1, op2) - // result = result + 2^15 ; readjust it back - // - // signed byte (SSE2): - // op1 = op1 + 2^7 ; to make it unsigned - // op1 = op1 + 2^7 ; to make it unsigned - // result = SSE2 unsigned byte Min/Max(op1, op2) - // result = result - 2^15 ; readjust it back - - if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE || - (getSIMDSupportLevel() >= SIMD_SSE4_Supported && - (baseType == TYP_BYTE || baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_USHORT))) - { - // SSE2 or SSE4.1 has direct support - simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, baseType, size); - } - else if (baseType == TYP_USHORT || baseType == TYP_BYTE) - { - assert(getSIMDSupportLevel() == SIMD_SSE2_Supported); - int constVal; - SIMDIntrinsicID operIntrinsic; - SIMDIntrinsicID adjustIntrinsic; - var_types minMaxOperBaseType; - if (baseType == TYP_USHORT) - { - constVal = 0x80008000; - operIntrinsic = SIMDIntrinsicSub; - adjustIntrinsic = SIMDIntrinsicAdd; - minMaxOperBaseType = TYP_SHORT; - } - else - { - assert(baseType == TYP_BYTE); - constVal = 0x80808080; - operIntrinsic = SIMDIntrinsicAdd; - adjustIntrinsic = SIMDIntrinsicSub; - minMaxOperBaseType = TYP_UBYTE; - } - - GenTree* initVal = gtNewIconNode(constVal); - GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size); - - // Assign constVector to a temp, since we intend to use it more than once - // TODO-CQ: We have quite a few such constant vectors constructed during - // the importation of SIMD intrinsics. Make sure that we have a single - // temp per distinct constant per method. - GenTree* tmp = fgInsertCommaFormTemp(&constVector, typeHnd); - - // op1 = op1 - constVector - // op2 = op2 - constVector - op1 = gtNewSIMDNode(simdType, op1, constVector, operIntrinsic, baseType, size); - op2 = gtNewSIMDNode(simdType, op2, tmp, operIntrinsic, baseType, size); - - // compute min/max of op1 and op2 considering them as if minMaxOperBaseType - simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, minMaxOperBaseType, size); - - // re-adjust the value by adding or subtracting constVector - tmp = gtNewLclvNode(tmp->AsLclVarCommon()->GetLclNum(), tmp->TypeGet()); - simdTree = gtNewSIMDNode(simdType, simdTree, tmp, adjustIntrinsic, baseType, size); - } -#elif defined(TARGET_ARM64) - // Arm64 has direct support for all types except int64/uint64 - // For which we compute min/max as follows - // - // int64/uint64 - // compResult = (op1 < op2) in case of Min - // (op1 > op2) in case of Max - // Min/Max(op1, op2) = Select(compResult, op1, op2) - if (baseType != TYP_ULONG && baseType != TYP_LONG) - { - simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, baseType, size); - } -#endif - else - { - GenTree* dupOp1 = nullptr; - GenTree* dupOp2 = nullptr; - GenTree* op1Assign = nullptr; - GenTree* op2Assign = nullptr; - unsigned op1LclNum; - unsigned op2LclNum; - - if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0) - { - op1LclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max")); - lvaSetStruct(op1LclNum, typeHnd, false); - dupOp1 = gtNewLclvNode(op1LclNum, op1->TypeGet()); - op1Assign = gtNewTempAssign(op1LclNum, op1); - op1 = gtNewLclvNode(op1LclNum, op1->TypeGet()); - } - else - { - dupOp1 = gtCloneExpr(op1); - } - - if ((op2->gtFlags & GTF_SIDE_EFFECT) != 0) - { - op2LclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max")); - lvaSetStruct(op2LclNum, typeHnd, false); - dupOp2 = gtNewLclvNode(op2LclNum, op2->TypeGet()); - op2Assign = gtNewTempAssign(op2LclNum, op2); - op2 = gtNewLclvNode(op2LclNum, op2->TypeGet()); - } - else - { - dupOp2 = gtCloneExpr(op2); - } - - SIMDIntrinsicID relOpIntrinsic = - (intrinsicId == SIMDIntrinsicMin) ? SIMDIntrinsicLessThan : SIMDIntrinsicGreaterThan; - var_types relOpBaseType = baseType; - - // compResult = op1 relOp op2 - // simdTree = Select(compResult, op1, op2); - assert(dupOp1 != nullptr); - assert(dupOp2 != nullptr); - relOpIntrinsic = impSIMDRelOp(relOpIntrinsic, typeHnd, size, &relOpBaseType, &dupOp1, &dupOp2); - GenTree* compResult = gtNewSIMDNode(simdType, dupOp1, dupOp2, relOpIntrinsic, relOpBaseType, size); - unsigned compResultLclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max")); - lvaSetStruct(compResultLclNum, typeHnd, false); - GenTree* compResultAssign = gtNewTempAssign(compResultLclNum, compResult); - compResult = gtNewLclvNode(compResultLclNum, compResult->TypeGet()); - simdTree = impSIMDSelect(typeHnd, baseType, size, compResult, op1, op2); - simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), compResultAssign, simdTree); - - // Now create comma trees if we have created assignments of op1/op2 to temps - if (op2Assign != nullptr) - { - simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), op2Assign, simdTree); - } - - if (op1Assign != nullptr) - { - simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), op1Assign, simdTree); - } - } - - assert(simdTree != nullptr); - return simdTree; -#else // !(defined(TARGET_XARCH) || defined(TARGET_ARM64)) - assert(!"impSIMDMinMax() unimplemented on target arch"); - unreached(); -#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64)) -} - //------------------------------------------------------------------------ // getOp1ForConstructor: Get the op1 for a constructor call. // @@ -2881,18 +2692,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, } break; - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: - { - // op1 is the first operand; if instance method, op1 is "this" arg - // op2 is the second operand - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType, instMethod); - - retVal = impSIMDMinMax(simdIntrinsicID, clsHnd, baseType, size, op1, op2); - } - break; - case SIMDIntrinsicGetItem: { // op1 is a SIMD variable that is "this" arg diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index 608a769adf986..4ab67113a99f6 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -233,90 +233,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicMin: - if (baseType == TYP_FLOAT) - { - result = INS_minps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_minpd; - } - else if (baseType == TYP_UBYTE) - { - result = INS_pminub; - } - else if (baseType == TYP_SHORT) - { - result = INS_pminsw; - } - else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) - { - if (baseType == TYP_BYTE) - { - result = INS_pminsb; - } - else if (baseType == TYP_USHORT) - { - result = INS_pminuw; - } - else if (baseType == TYP_INT) - { - result = INS_pminsd; - } - else if (baseType == TYP_UINT) - { - result = INS_pminud; - } - } - else - { - unreached(); - } - break; - - case SIMDIntrinsicMax: - if (baseType == TYP_FLOAT) - { - result = INS_maxps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_maxpd; - } - else if (baseType == TYP_UBYTE) - { - result = INS_pmaxub; - } - else if (baseType == TYP_SHORT) - { - result = INS_pmaxsw; - } - else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) - { - if (baseType == TYP_BYTE) - { - result = INS_pmaxsb; - } - else if (baseType == TYP_USHORT) - { - result = INS_pmaxuw; - } - else if (baseType == TYP_INT) - { - result = INS_pmaxsd; - } - else if (baseType == TYP_UINT) - { - result = INS_pmaxud; - } - } - else - { - unreached(); - } - break; - case SIMDIntrinsicEqual: if (baseType == TYP_FLOAT) { @@ -1780,8 +1696,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax); + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor); GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); @@ -3237,8 +3152,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: genSIMDIntrinsicBinOp(simdNode); break; diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index c048b87cf3734..e2896a35b45e8 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -102,9 +102,6 @@ SIMD_INTRINSIC("SquareRoot", false, Sqrt, SIMD_INTRINSIC("Ceiling", false, Ceil, "ceil", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) SIMD_INTRINSIC("Floor", false, Floor, "floor", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -SIMD_INTRINSIC("Min", false, Min, "min", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("Max", false, Max, "max", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - // Vector Relational operators SIMD_INTRINSIC("Equals", false, Equal, "eq", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("LessThan", false, LessThan, "lt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) From d8b128647921f99f46bd148f9e754e34d67437f3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 12:52:00 -0700 Subject: [PATCH 10/24] Removing SIMDIntrinsicCeil and SIMDIntrinsicFloor --- src/coreclr/src/jit/codegen.h | 1 - src/coreclr/src/jit/codegenarm64.cpp | 11 +---- src/coreclr/src/jit/lsraarm64.cpp | 2 - src/coreclr/src/jit/lsraxarch.cpp | 5 --- src/coreclr/src/jit/simd.cpp | 13 ------ src/coreclr/src/jit/simdcodegenxarch.cpp | 51 ------------------------ src/coreclr/src/jit/simdintrinsiclist.h | 3 -- 7 files changed, 1 insertion(+), 85 deletions(-) diff --git a/src/coreclr/src/jit/codegen.h b/src/coreclr/src/jit/codegen.h index 93227bc7f6c1f..8dd1e7848e38b 100644 --- a/src/coreclr/src/jit/codegen.h +++ b/src/coreclr/src/jit/codegen.h @@ -978,7 +978,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genSIMDIntrinsicInit(GenTreeSIMD* simdNode); void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode); void genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode); - void genSIMDIntrinsicUnOpWithImm(GenTreeSIMD* simdNode); void genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode); void genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode); void genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode); diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index cb494428b3b71..aba59eeb85af3 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3827,8 +3827,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicConvertToInt32: case SIMDIntrinsicConvertToDouble: case SIMDIntrinsicConvertToInt64: - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: genSIMDIntrinsicUnOp(simdNode); break; @@ -4009,12 +4007,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicWidenHi: result = INS_fcvtl2; break; - case SIMDIntrinsicCeil: - result = INS_frintp; - break; - case SIMDIntrinsicFloor: - result = INS_frintm; - break; default: assert(!"Unsupported SIMD intrinsic"); unreached(); @@ -4238,8 +4230,7 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64 || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCeil || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicFloor); + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64); GenTree* op1 = simdNode->gtGetOp1(); var_types baseType = simdNode->gtSIMDBaseType; diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 863f09ee2e63b..72fdd8679a7ff 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -809,8 +809,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicConvertToInt64: case SIMDIntrinsicWidenLo: case SIMDIntrinsicWidenHi: - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: // No special handling required. break; diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 544f3ef2884df..f120221394b29 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1965,11 +1965,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); break; - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: - assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported); - break; - case SIMDIntrinsicAdd: case SIMDIntrinsicSub: case SIMDIntrinsicMul: diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 09212bfa0dbcd..1f82702e07acc 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -2777,19 +2777,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, } break; - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: -#if defined(TARGET_XARCH) - // Rounding instructions are only available from SSE4.1. - if (getSIMDSupportLevel() < SIMD_SSE4_Supported) - { - return nullptr; - } -#endif // defined(TARGET_XARCH) - op1 = impSIMDPopStack(simdType); - retVal = gtNewSIMDNode(genActualType(callType), op1, simdIntrinsicID, baseType, size); - break; - case SIMDIntrinsicGetW: retVal = impSIMDGetFixed(simdType, baseType, size, 3); break; diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index 4ab67113a99f6..34d931782bc50 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -543,26 +543,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type result = INS_insertps; break; - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: - if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) - { - if (baseType == TYP_FLOAT) - { - result = INS_roundps; - } - else - { - assert(baseType == TYP_DOUBLE); - result = INS_roundpd; - } - - assert(ival != nullptr); - *ival = (intrinsicId == SIMDIntrinsicCeil) ? ROUNDPS_TOWARD_POSITIVE_INFINITY_IMM - : ROUNDPS_TOWARD_NEGATIVE_INFINITY_IMM; - } - break; - default: assert(!"Unsupported SIMD intrinsic"); unreached(); @@ -977,32 +957,6 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) genProduceReg(simdNode); } -//---------------------------------------------------------------------------------- -// genSIMDIntrinsicUnOpWithImm: Generate code for SIMD Intrinsic unary operations with an imm8, such as Ceil. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicUnOpWithImm(GenTreeSIMD* simdNode) -{ - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCeil || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicFloor); - - GenTree* op1 = simdNode->gtGetOp1(); - var_types baseType = simdNode->gtSIMDBaseType; - regNumber targetReg = simdNode->GetRegNum(); - assert(targetReg != REG_NA); - var_types targetType = simdNode->TypeGet(); - - regNumber op1Reg = genConsumeReg(op1); - unsigned ival; - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival); - assert((ival >= 0) && (ival <= 255)); - GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, op1Reg, (int8_t)ival); -} - //---------------------------------------------------------------------------------- // genSIMDIntrinsic32BitConvert: Generate code for 32-bit SIMD Convert (int/uint <-> float) // @@ -3191,11 +3145,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicUpperRestore(simdNode); break; - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: - genSIMDIntrinsicUnOpWithImm(simdNode); - break; - default: noway_assert(!"Unimplemented SIMD intrinsic."); unreached(); diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index e2896a35b45e8..fa1df4d5690b7 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -99,9 +99,6 @@ SIMD_INTRINSIC("op_Division", false, Div, // SquareRoot is recognized as an intrinsic only for float or double vectors SIMD_INTRINSIC("SquareRoot", false, Sqrt, "sqrt", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -SIMD_INTRINSIC("Ceiling", false, Ceil, "ceil", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -SIMD_INTRINSIC("Floor", false, Floor, "floor", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) - // Vector Relational operators SIMD_INTRINSIC("Equals", false, Equal, "eq", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("LessThan", false, LessThan, "lt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) From e61b0c4a39e0d3631aff9ffdcff0e5b38c80eb86 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 12:53:51 -0700 Subject: [PATCH 11/24] Removing SIMDIntrinsicSqrt --- src/coreclr/src/jit/codegenarm64.cpp | 6 +----- src/coreclr/src/jit/lsraarm64.cpp | 1 - src/coreclr/src/jit/lsraxarch.cpp | 5 ----- src/coreclr/src/jit/simd.cpp | 20 -------------------- src/coreclr/src/jit/simdcodegenxarch.cpp | 18 +----------------- src/coreclr/src/jit/simdintrinsiclist.h | 3 --- 6 files changed, 2 insertions(+), 51 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index aba59eeb85af3..2af00f21b4053 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3821,7 +3821,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicInitN(simdNode); break; - case SIMDIntrinsicSqrt: case SIMDIntrinsicCast: case SIMDIntrinsicConvertToSingle: case SIMDIntrinsicConvertToInt32: @@ -3995,9 +3994,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicSelect: result = INS_bsl; break; - case SIMDIntrinsicSqrt: - result = INS_fsqrt; - break; case SIMDIntrinsicSub: result = INS_fsub; break; @@ -4226,7 +4222,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || + assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble || diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 72fdd8679a7ff..6154765dc5653 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -802,7 +802,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { case SIMDIntrinsicInit: case SIMDIntrinsicCast: - case SIMDIntrinsicSqrt: case SIMDIntrinsicConvertToSingle: case SIMDIntrinsicConvertToInt32: case SIMDIntrinsicConvertToDouble: diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index f120221394b29..1900d56e6635d 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1960,11 +1960,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); break; - case SIMDIntrinsicSqrt: - // SSE2 has no instruction support for sqrt on integer vectors. - noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); - break; - case SIMDIntrinsicAdd: case SIMDIntrinsicSub: case SIMDIntrinsicMul: diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 1f82702e07acc..6333ace3242fd 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1075,7 +1075,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in case SIMDIntrinsicSub: case SIMDIntrinsicMul: case SIMDIntrinsicDiv: - case SIMDIntrinsicSqrt: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicLessThanOrEqual: @@ -2758,25 +2757,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, } break; - case SIMDIntrinsicSqrt: - { -#if (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(DEBUG) - // SSE/AVX/ARM64 doesn't support sqrt on integer type vectors and hence - // should never be seen as an intrinsic here. See SIMDIntrinsicList.h - // for supported base types for this intrinsic. - if (!varTypeIsFloating(baseType)) - { - assert(!"Sqrt not supported on integer vectors\n"); - return nullptr; - } -#endif // (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(DEBUG) - - op1 = impSIMDPopStack(simdType); - - retVal = gtNewSIMDNode(genActualType(callType), op1, nullptr, simdIntrinsicID, baseType, size); - } - break; - case SIMDIntrinsicGetW: retVal = impSIMDGetFixed(simdType, baseType, size, 3); break; diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index 34d931782bc50..5285d7f61aa6b 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -130,21 +130,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicSqrt: - if (baseType == TYP_FLOAT) - { - result = INS_sqrtps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_sqrtpd; - } - else - { - unreached(); - } - break; - case SIMDIntrinsicAdd: if (baseType == TYP_FLOAT) { @@ -940,7 +925,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast); + assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast); GenTree* op1 = simdNode->gtGetOp1(); var_types baseType = simdNode->gtSIMDBaseType; @@ -3074,7 +3059,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicInitN(simdNode); break; - case SIMDIntrinsicSqrt: case SIMDIntrinsicCast: genSIMDIntrinsicUnOp(simdNode); break; diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index fa1df4d5690b7..12dcb58bca708 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -96,9 +96,6 @@ SIMD_INTRINSIC("op_Multiply", false, Mul, SIMD_INTRINSIC("op_Division", false, Div, "/", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -// SquareRoot is recognized as an intrinsic only for float or double vectors -SIMD_INTRINSIC("SquareRoot", false, Sqrt, "sqrt", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) - // Vector Relational operators SIMD_INTRINSIC("Equals", false, Equal, "eq", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("LessThan", false, LessThan, "lt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) From a8f7cf5ea4f6e4777602b69686f34fd8b59d1f86 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 12:55:55 -0700 Subject: [PATCH 12/24] Removing SIMDIntrinsicSelect --- src/coreclr/src/jit/codegenarm64.cpp | 10 ---- src/coreclr/src/jit/compiler.h | 8 --- src/coreclr/src/jit/lsraarm64.cpp | 8 --- src/coreclr/src/jit/simd.cpp | 73 ------------------------- src/coreclr/src/jit/simdintrinsiclist.h | 3 - 5 files changed, 102 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 2af00f21b4053..4478b00852f4a 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3882,10 +3882,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicUpperRestore(simdNode); break; - case SIMDIntrinsicSelect: - NYI("SIMDIntrinsicSelect lowered during import to (a & sel) | (b & ~sel)"); - break; - default: noway_assert(!"Unimplemented SIMD intrinsic."); unreached(); @@ -3991,9 +3987,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type // Return lower bytes instruction here result = INS_fcvtn; break; - case SIMDIntrinsicSelect: - result = INS_bsl; - break; case SIMDIntrinsicSub: result = INS_fsub; break; @@ -4061,9 +4054,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type // Return lower bytes instruction here result = INS_xtn; break; - case SIMDIntrinsicSelect: - result = INS_bsl; - break; case SIMDIntrinsicSub: result = INS_sub; break; diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 81b1da528a6d3..cde026600c70a 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -8084,14 +8084,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Create a GT_SIMD tree for a Get property of SIMD vector with a fixed index. GenTreeSIMD* impSIMDGetFixed(var_types simdType, var_types baseType, unsigned simdSize, int index); - // Creates a GT_SIMD tree for Select operation - GenTree* impSIMDSelect(CORINFO_CLASS_HANDLE typeHnd, - var_types baseType, - unsigned simdVectorSize, - GenTree* op1, - GenTree* op2, - GenTree* op3); - // Transforms operands and returns the SIMD intrinsic to be applied on // transformed operands to obtain given relop result. SIMDIntrinsicID impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 6154765dc5653..f294262673cf6 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -919,14 +919,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) buildInternalFloatRegisterDefForNode(simdTree); break; - case SIMDIntrinsicSelect: - // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB - // bsl target register must be VC. Reserve a temp in case we need to shuffle things. - // This will require a different approach, as GenTreeSIMD has only two operands. - assert(!"SIMDIntrinsicSelect not yet supported"); - buildInternalFloatRegisterDefForNode(simdTree); - break; - case SIMDIntrinsicInitArrayX: case SIMDIntrinsicInitFixed: case SIMDIntrinsicCopyToArray: diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 6333ace3242fd..e964ea879f9f5 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1662,66 +1662,6 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, return intrinsicID; } -// Creates a GT_SIMD tree for Select operation -// -// Arguments: -// typeHnd - type handle of SIMD vector -// baseType - base type of SIMD vector -// size - SIMD vector size -// op1 - first operand = Condition vector vc -// op2 - second operand = va -// op3 - third operand = vb -// -// Return Value: -// Returns GT_SIMD tree that computes Select(vc, va, vb) -// -GenTree* Compiler::impSIMDSelect( - CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned size, GenTree* op1, GenTree* op2, GenTree* op3) -{ - assert(varTypeIsSIMD(op1)); - var_types simdType = op1->TypeGet(); - assert(op2->TypeGet() == simdType); - assert(op3->TypeGet() == simdType); - - // TODO-ARM64-CQ Support generating select instruction for SIMD - - // Select(BitVector vc, va, vb) = (va & vc) | (vb & !vc) - // Select(op1, op2, op3) = (op2 & op1) | (op3 & !op1) - // = SIMDIntrinsicBitwiseOr(SIMDIntrinsicBitwiseAnd(op2, op1), - // SIMDIntrinsicBitwiseAndNot(op3, op1)) - // - // If Op1 has side effect, create an assignment to a temp - GenTree* tmp = op1; - GenTree* asg = nullptr; - if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0) - { - unsigned lclNum = lvaGrabTemp(true DEBUGARG("SIMD Select")); - lvaSetStruct(lclNum, typeHnd, false); - tmp = gtNewLclvNode(lclNum, op1->TypeGet()); - asg = gtNewTempAssign(lclNum, op1); - } - - GenTree* andExpr = gtNewSIMDNode(simdType, op2, tmp, SIMDIntrinsicBitwiseAnd, baseType, size); - GenTree* dupOp1 = gtCloneExpr(tmp); - assert(dupOp1 != nullptr); -#ifdef TARGET_ARM64 - // ARM64 implements SIMDIntrinsicBitwiseAndNot as Left & ~Right - GenTree* andNotExpr = gtNewSIMDNode(simdType, op3, dupOp1, SIMDIntrinsicBitwiseAndNot, baseType, size); -#else - // XARCH implements SIMDIntrinsicBitwiseAndNot as ~Left & Right - GenTree* andNotExpr = gtNewSIMDNode(simdType, dupOp1, op3, SIMDIntrinsicBitwiseAndNot, baseType, size); -#endif - GenTree* simdTree = gtNewSIMDNode(simdType, andExpr, andNotExpr, SIMDIntrinsicBitwiseOr, baseType, size); - - // If asg not null, create a GT_COMMA tree. - if (asg != nullptr) - { - simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), asg, simdTree); - } - - return simdTree; -} - //------------------------------------------------------------------------ // getOp1ForConstructor: Get the op1 for a constructor call. // @@ -2678,19 +2618,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, } break; - case SIMDIntrinsicSelect: - { - // op3 is a SIMD variable that is the second source - // op2 is a SIMD variable that is the first source - // op1 is a SIMD variable which is the bit mask. - op3 = impSIMDPopStack(simdType); - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType); - - retVal = impSIMDSelect(clsHnd, baseType, size, op1, op2, op3); - } - break; - case SIMDIntrinsicGetItem: { // op1 is a SIMD variable that is "this" arg diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index 12dcb58bca708..d81ddfd413d12 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -118,9 +118,6 @@ SIMD_INTRINSIC("Dot", false, DotProduct, SIMD_INTRINSIC("Dot", false, DotProduct, "Dot", TYP_UNKNOWN, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_UNDEF, TYP_UNDEF}) #endif -// Select -SIMD_INTRINSIC("ConditionalSelect", false, Select, "Select", TYP_STRUCT, 3, {TYP_STRUCT, TYP_STRUCT, TYP_STRUCT}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - // Cast SIMD_INTRINSIC("op_Explicit", false, Cast, "Cast", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) From 9a624c7c5846aa7d4f3d5307b9f0f0d44ebb5580 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 13:01:47 -0700 Subject: [PATCH 13/24] Removing SIMDIntrinsicBitwiseAndNot and SIMDIntrinsicBitwiseXor --- src/coreclr/src/jit/codegenarm64.cpp | 17 +-------- src/coreclr/src/jit/gentree.cpp | 1 - src/coreclr/src/jit/lsraarm64.cpp | 2 - src/coreclr/src/jit/lsraxarch.cpp | 2 - src/coreclr/src/jit/simd.cpp | 16 -------- src/coreclr/src/jit/simdcodegenxarch.cpp | 48 +++--------------------- src/coreclr/src/jit/simdintrinsiclist.h | 2 - 7 files changed, 6 insertions(+), 82 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 4478b00852f4a..54e60aa4718bf 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3843,9 +3843,7 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicGreaterThan: @@ -3945,15 +3943,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicBitwiseAnd: result = INS_and; break; - case SIMDIntrinsicBitwiseAndNot: - result = INS_bic; - break; case SIMDIntrinsicBitwiseOr: result = INS_orr; break; - case SIMDIntrinsicBitwiseXor: - result = INS_eor; - break; case SIMDIntrinsicCast: result = INS_mov; break; @@ -4013,15 +4005,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicBitwiseAnd: result = INS_and; break; - case SIMDIntrinsicBitwiseAndNot: - result = INS_bic; - break; case SIMDIntrinsicBitwiseOr: result = INS_orr; break; - case SIMDIntrinsicBitwiseXor: - result = INS_eor; - break; case SIMDIntrinsicCast: result = INS_mov; break; @@ -4372,9 +4358,8 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual || + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThanOrEqual || diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 194d44436b0c0..3a0f498311f1e 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -18442,7 +18442,6 @@ bool GenTree::isCommutativeSIMDIntrinsic() case SIMDIntrinsicAdd: case SIMDIntrinsicBitwiseAnd: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicEqual: case SIMDIntrinsicMul: case SIMDIntrinsicOpEquality: diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index f294262673cf6..a50c08f000957 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -854,9 +854,7 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicGreaterThan: diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 1900d56e6635d..f82f4eb58f65f 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1964,9 +1964,7 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicSub: case SIMDIntrinsicMul: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: // SSE2 32-bit integer multiplication requires two temp regs if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT && compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index e964ea879f9f5..b0c5f2d6a4aa8 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1081,9 +1081,7 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in case SIMDIntrinsicGreaterThan: case SIMDIntrinsicGreaterThanOrEqual: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicDotProduct: case SIMDIntrinsicCast: case SIMDIntrinsicConvertToSingle: @@ -2550,9 +2548,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: { #if defined(DEBUG) // check for the cases where we don't support intrinsics. @@ -2601,18 +2597,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, op2 = impSIMDPopStack(simdType); op1 = impSIMDPopStack(simdType, instMethod); -#ifdef TARGET_XARCH - if (simdIntrinsicID == SIMDIntrinsicBitwiseAndNot) - { - // XARCH implements SIMDIntrinsicBitwiseAndNot as ~op1 & op2, while the - // software implementation does op1 & ~op2, so we need to swap the operands - - GenTree* tmp = op2; - op2 = op1; - op1 = tmp; - } -#endif // TARGET_XARCH - simdTree = gtNewSIMDNode(simdType, op1, op2, simdIntrinsicID, baseType, size); retVal = simdTree; } diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index 5285d7f61aa6b..e861134abdad9 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -324,25 +324,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicBitwiseAndNot: - if (baseType == TYP_FLOAT) - { - result = INS_andnps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_andnpd; - } - else if (baseType == TYP_INT) - { - result = INS_pandn; - } - else if (varTypeIsIntegral(baseType)) - { - result = INS_pandn; - } - break; - case SIMDIntrinsicBitwiseOr: if (baseType == TYP_FLOAT) { @@ -358,21 +339,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicBitwiseXor: - if (baseType == TYP_FLOAT) - { - result = INS_xorps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_xorpd; - } - else if (varTypeIsIntegral(baseType)) - { - result = INS_pxor; - } - break; - case SIMDIntrinsicCast: result = INS_movaps; break; @@ -623,10 +589,10 @@ void CodeGen::genSIMDScalarMove( void CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg) { - // We just use `INS_xorps` instead of `getOpForSIMDIntrinsic(SIMDIntrinsicBitwiseXor, baseType)` - // since `genSIMDZero` is used for both `System.Numerics.Vectors` and HardwareIntrinsics. Modern - // CPUs handle this specially in the renamer and it never hits the execution pipeline, additionally - // `INS_xorps` is always available (when using either the legacy or VEX encoding). + // We just use `INS_xorps` since `genSIMDZero` is used for both `System.Numerics.Vectors` and + // HardwareIntrinsics. Modern CPUs handle this specially in the renamer and it never hits the + // execution pipeline, additionally `INS_xorps` is always available (when using either the + // legacy or VEX encoding). inst_RV_RV(INS_xorps, targetReg, targetReg, targetType, emitActualTypeSize(targetType)); } @@ -1633,9 +1599,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor); + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr); GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); @@ -3087,9 +3051,7 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: genSIMDIntrinsicBinOp(simdNode); break; diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index d81ddfd413d12..1d38420c1a4f0 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -105,9 +105,7 @@ SIMD_INTRINSIC("GreaterThanOrEqual", false, GreaterThanOrEqual, // Bitwise operations SIMD_INTRINSIC("op_BitwiseAnd", false, BitwiseAnd, "&", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("AndNot", false, BitwiseAndNot, "&~", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("op_BitwiseOr", false, BitwiseOr, "|", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("op_ExclusiveOr", false, BitwiseXor, "^", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) // Dot Product #if defined(TARGET_XARCH) From 4cf149bbade338a1deca1d655ae917cac2cc79d8 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 13:06:52 -0700 Subject: [PATCH 14/24] Removing SIMDIntrinsicGreaterThanOrEqual and SIMDIntrinsicLessThanOrEqual --- src/coreclr/src/jit/codegenarm64.cpp | 19 +-- src/coreclr/src/jit/compiler.h | 16 +-- src/coreclr/src/jit/lsraarm64.cpp | 2 - src/coreclr/src/jit/lsraxarch.cpp | 1 - src/coreclr/src/jit/simd.cpp | 148 ++--------------------- src/coreclr/src/jit/simdcodegenxarch.cpp | 21 ---- src/coreclr/src/jit/simdintrinsiclist.h | 2 - 7 files changed, 9 insertions(+), 200 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 54e60aa4718bf..51dbec2bd4ac2 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3847,8 +3847,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThanOrEqual: genSIMDIntrinsicBinOp(simdNode); break; @@ -3962,15 +3960,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicGreaterThan: result = INS_fcmgt; break; - case SIMDIntrinsicGreaterThanOrEqual: - result = INS_fcmge; - break; case SIMDIntrinsicLessThan: result = INS_fcmlt; break; - case SIMDIntrinsicLessThanOrEqual: - result = INS_fcmle; - break; case SIMDIntrinsicMul: result = INS_fmul; break; @@ -4021,17 +4013,10 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicGreaterThan: result = isUnsigned ? INS_cmhi : INS_cmgt; break; - case SIMDIntrinsicGreaterThanOrEqual: - result = isUnsigned ? INS_cmhs : INS_cmge; - break; case SIMDIntrinsicLessThan: assert(!isUnsigned); result = INS_cmlt; break; - case SIMDIntrinsicLessThanOrEqual: - assert(!isUnsigned); - result = INS_cmle; - break; case SIMDIntrinsicMul: result = INS_mul; break; @@ -4361,9 +4346,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThanOrEqual || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThanOrEqual); + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan); GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index cde026600c70a..ad92b02fbe70b 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -7979,8 +7979,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX static bool isRelOpSIMDIntrinsic(SIMDIntrinsicID intrinsicId) { return (intrinsicId == SIMDIntrinsicEqual || intrinsicId == SIMDIntrinsicLessThan || - intrinsicId == SIMDIntrinsicLessThanOrEqual || intrinsicId == SIMDIntrinsicGreaterThan || - intrinsicId == SIMDIntrinsicGreaterThanOrEqual); + intrinsicId == SIMDIntrinsicGreaterThan); } // Returns base type of a TYP_SIMD local. @@ -8109,19 +8108,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX GenTree** op1, GenTree** op2); - // Transforms operands and returns the SIMD intrinsic to be applied on - // transformed operands to obtain >= comparison result. - SIMDIntrinsicID impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd, - unsigned simdVectorSize, - GenTree** op1, - GenTree** op2); - - // Transforms operands and returns the SIMD intrinsic to be applied on - // transformed operands to obtain >= comparison result in case of int32 - // and small int base type vectors. - SIMDIntrinsicID impSIMDIntegralRelOpGreaterThanOrEqual( - CORINFO_CLASS_HANDLE typeHnd, unsigned simdVectorSize, var_types baseType, GenTree** op1, GenTree** op2); - #endif // defined(TARGET_XARCH) void setLclRelatedToSIMDIntrinsic(GenTree* tree); diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index a50c08f000957..2bd730f56596d 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -858,8 +858,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThanOrEqual: // No special handling required. break; diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index f82f4eb58f65f..693b11c9b62c2 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1980,7 +1980,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) // SSE2 doesn't support < and <= directly on int vectors. // Instead we need to use > and >= with swapped operands. case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType)); break; diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index b0c5f2d6a4aa8..0c7614302a400 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1077,9 +1077,7 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in case SIMDIntrinsicDiv: case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicGreaterThanOrEqual: case SIMDIntrinsicBitwiseAnd: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicDotProduct: @@ -1358,122 +1356,6 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeH *pOp2 = w; return SIMDIntrinsicBitwiseOr; } - -// impSIMDLongRelOpGreaterThanOrEqual: transforms operands and returns the SIMD intrinsic to be applied on -// transformed operands to obtain >= comparison result. -// -// Arguments: -// typeHnd - type handle of SIMD vector -// size - SIMD vector size -// pOp1 - in-out parameter; first operand -// pOp2 - in-out parameter; second operand -// -// Return Value: -// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands -// -SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd, - unsigned size, - GenTree** pOp1, - GenTree** pOp2) -{ - var_types simdType = (*pOp1)->TypeGet(); - assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); - - // expand this to (a == b) | (a > b) - GenTree* dupOp1 = nullptr; - GenTree* dupOp2 = nullptr; - - if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd); - } - else - { - dupOp1 = gtCloneExpr(*pOp1); - } - - if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd); - } - else - { - dupOp2 = gtCloneExpr(*pOp2); - } - - assert(dupOp1 != nullptr && dupOp2 != nullptr); - assert(*pOp1 != nullptr && *pOp2 != nullptr); - - // (a==b) - SIMDIntrinsicID id = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2); - *pOp1 = gtNewSIMDNode(simdType, *pOp1, *pOp2, id, TYP_LONG, size); - - // (a > b) - id = impSIMDLongRelOpGreaterThan(typeHnd, size, &dupOp1, &dupOp2); - *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, id, TYP_LONG, size); - - return SIMDIntrinsicBitwiseOr; -} - -// impSIMDInt32OrSmallIntRelOpGreaterThanOrEqual: transforms operands and returns the SIMD intrinsic to be applied on -// transformed operands to obtain >= comparison result in case of integer base type vectors -// -// Arguments: -// typeHnd - type handle of SIMD vector -// size - SIMD vector size -// baseType - base type of SIMD vector -// pOp1 - in-out parameter; first operand -// pOp2 - in-out parameter; second operand -// -// Return Value: -// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands -// -SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual( - CORINFO_CLASS_HANDLE typeHnd, unsigned size, var_types baseType, GenTree** pOp1, GenTree** pOp2) -{ - var_types simdType = (*pOp1)->TypeGet(); - assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); - - // This routine should be used only for integer base type vectors - assert(varTypeIsIntegral(baseType)); - if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE)) - { - return impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2); - } - - // expand this to (a == b) | (a > b) - GenTree* dupOp1 = nullptr; - GenTree* dupOp2 = nullptr; - - if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd); - } - else - { - dupOp1 = gtCloneExpr(*pOp1); - } - - if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd); - } - else - { - dupOp2 = gtCloneExpr(*pOp2); - } - - assert(dupOp1 != nullptr && dupOp2 != nullptr); - assert(*pOp1 != nullptr && *pOp2 != nullptr); - - // (a==b) - *pOp1 = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicEqual, baseType, size); - - // (a > b) - *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicGreaterThan, baseType, size); - - return SIMDIntrinsicBitwiseOr; -} #endif // TARGET_XARCH // Transforms operands and returns the SIMD intrinsic to be applied on @@ -1510,28 +1392,26 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, { // SSE2/AVX doesn't support > and >= on vector float/double. // Therefore, we need to use < and <= with swapped operands - if (relOpIntrinsicId == SIMDIntrinsicGreaterThan || relOpIntrinsicId == SIMDIntrinsicGreaterThanOrEqual) + if (relOpIntrinsicId == SIMDIntrinsicGreaterThan) { GenTree* tmp = *pOp1; *pOp1 = *pOp2; *pOp2 = tmp; - intrinsicID = - (relOpIntrinsicId == SIMDIntrinsicGreaterThan) ? SIMDIntrinsicLessThan : SIMDIntrinsicLessThanOrEqual; + intrinsicID = SIMDIntrinsicLessThan; } } else if (varTypeIsIntegral(baseType)) { // SSE/AVX doesn't support < and <= on integer base type vectors. // Therefore, we need to use > and >= with swapped operands. - if (intrinsicID == SIMDIntrinsicLessThan || intrinsicID == SIMDIntrinsicLessThanOrEqual) + if (intrinsicID == SIMDIntrinsicLessThan) { GenTree* tmp = *pOp1; *pOp1 = *pOp2; *pOp2 = tmp; - intrinsicID = (relOpIntrinsicId == SIMDIntrinsicLessThan) ? SIMDIntrinsicGreaterThan - : SIMDIntrinsicGreaterThanOrEqual; + intrinsicID = SIMDIntrinsicGreaterThan; } if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && baseType == TYP_LONG) @@ -1546,24 +1426,13 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, { intrinsicID = impSIMDLongRelOpGreaterThan(typeHnd, size, pOp1, pOp2); } - else if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual) - { - intrinsicID = impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2); - } else { unreached(); } } // SSE2 and AVX direct support for signed comparison of int32, int16 and int8 types - else if (!varTypeIsUnsigned(baseType)) - { - if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual) - { - intrinsicID = impSIMDIntegralRelOpGreaterThanOrEqual(typeHnd, size, baseType, pOp1, pOp2); - } - } - else // unsigned + else if (varTypeIsUnsigned(baseType)) { // Vector, Vector, Vector and Vector: // SSE2 supports > for signed comparison. Therefore, to use it for @@ -1643,14 +1512,13 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, // TARGET_ARM64 doesn't support < and <= on register register comparisons // Therefore, we need to use > and >= with swapped operands. - if (intrinsicID == SIMDIntrinsicLessThan || intrinsicID == SIMDIntrinsicLessThanOrEqual) + if (intrinsicID == SIMDIntrinsicLessThan) { GenTree* tmp = *pOp1; *pOp1 = *pOp2; *pOp2 = tmp; - intrinsicID = - (intrinsicID == SIMDIntrinsicLessThan) ? SIMDIntrinsicGreaterThan : SIMDIntrinsicGreaterThanOrEqual; + intrinsicID = SIMDIntrinsicGreaterThan; } #else // !TARGET_XARCH assert(!"impSIMDRelOp() unimplemented on target arch"); @@ -2530,9 +2398,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicGreaterThanOrEqual: { op2 = impSIMDPopStack(simdType); op1 = impSIMDPopStack(simdType, instMethod); diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index e861134abdad9..ef441009b39ab 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -268,24 +268,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicLessThanOrEqual: - // Packed integers use (a==b) || ( b > a) in place of a <= b. - assert(baseType != TYP_INT); - - if (baseType == TYP_FLOAT) - { - result = INS_cmpps; - assert(ival != nullptr); - *ival = 2; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_cmppd; - assert(ival != nullptr); - *ival = 2; - } - break; - case SIMDIntrinsicGreaterThan: // Packed float/double use < with swapped operands assert(!varTypeIsFloating(baseType)); @@ -1847,7 +1829,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) break; case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: { assert(targetReg != REG_NA); @@ -3060,8 +3041,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicEqual: case SIMDIntrinsicLessThan: case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThanOrEqual: genSIMDIntrinsicRelOp(simdNode); break; diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index 1d38420c1a4f0..77709a6b7a499 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -99,9 +99,7 @@ SIMD_INTRINSIC("op_Division", false, Div, // Vector Relational operators SIMD_INTRINSIC("Equals", false, Equal, "eq", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("LessThan", false, LessThan, "lt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("LessThanOrEqual", false, LessThanOrEqual, "le", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("GreaterThan", false, GreaterThan, "gt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("GreaterThanOrEqual", false, GreaterThanOrEqual, "ge", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) // Bitwise operations SIMD_INTRINSIC("op_BitwiseAnd", false, BitwiseAnd, "&", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) From b73628d2d13b6b21b91cf7bc5c80da820180286a Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 13:14:15 -0700 Subject: [PATCH 15/24] Removing SIMDIntrinsicGreaterThan and SIMDIntrinsicLessThan --- src/coreclr/src/jit/codegenarm64.cpp | 19 +-- src/coreclr/src/jit/compiler.h | 10 +- src/coreclr/src/jit/lsraarm64.cpp | 2 - src/coreclr/src/jit/lsraxarch.cpp | 13 --- src/coreclr/src/jit/simd.cpp | 140 +---------------------- src/coreclr/src/jit/simdcodegenxarch.cpp | 94 ++++----------- src/coreclr/src/jit/simdintrinsiclist.h | 2 - 7 files changed, 23 insertions(+), 257 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 51dbec2bd4ac2..86f7b017e7d6f 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3845,8 +3845,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicBitwiseAnd: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicGreaterThan: genSIMDIntrinsicBinOp(simdNode); break; @@ -3957,12 +3955,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicEqual: result = INS_fcmeq; break; - case SIMDIntrinsicGreaterThan: - result = INS_fcmgt; - break; - case SIMDIntrinsicLessThan: - result = INS_fcmlt; - break; case SIMDIntrinsicMul: result = INS_fmul; break; @@ -4010,13 +4002,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicEqual: result = INS_cmeq; break; - case SIMDIntrinsicGreaterThan: - result = isUnsigned ? INS_cmhi : INS_cmgt; - break; - case SIMDIntrinsicLessThan: - assert(!isUnsigned); - result = INS_cmlt; - break; case SIMDIntrinsicMul: result = INS_mul; break; @@ -4344,9 +4329,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan); + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual); GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index ad92b02fbe70b..68eb6abfdd4ba 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -7978,8 +7978,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX static bool isRelOpSIMDIntrinsic(SIMDIntrinsicID intrinsicId) { - return (intrinsicId == SIMDIntrinsicEqual || intrinsicId == SIMDIntrinsicLessThan || - intrinsicId == SIMDIntrinsicGreaterThan); + return (intrinsicId == SIMDIntrinsicEqual); } // Returns base type of a TYP_SIMD local. @@ -8101,13 +8100,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX GenTree** op1, GenTree** op2); - // Transforms operands and returns the SIMD intrinsic to be applied on - // transformed operands to obtain > comparison result. - SIMDIntrinsicID impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd, - unsigned simdVectorSize, - GenTree** op1, - GenTree** op2); - #endif // defined(TARGET_XARCH) void setLclRelatedToSIMDIntrinsic(GenTree* tree); diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 2bd730f56596d..d08d188ceef3e 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -856,8 +856,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicBitwiseAnd: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicGreaterThan: // No special handling required. break; diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 693b11c9b62c2..70bb5aae64f74 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1977,19 +1977,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicEqual: break; - // SSE2 doesn't support < and <= directly on int vectors. - // Instead we need to use > and >= with swapped operands. - case SIMDIntrinsicLessThan: - noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType)); - break; - - // SIMDIntrinsicEqual is supported only on non-floating point base type vectors. - // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors. - // Instead we need to use < and <= with swapped operands. - case SIMDIntrinsicGreaterThan: - noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType)); - break; - case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: if (simdTree->gtGetOp2()->isContained()) diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 0c7614302a400..779aa72289ec3 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1076,8 +1076,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicGreaterThan: case SIMDIntrinsicBitwiseAnd: case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicDotProduct: @@ -1260,102 +1258,6 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd, SIMDIntrinsicShuffleSSE2, TYP_INT, size); return SIMDIntrinsicBitwiseAnd; } - -// impSIMDLongRelOpGreaterThan: transforms operands and returns the SIMD intrinsic to be applied on -// transformed operands to obtain > comparison result. -// -// Arguments: -// typeHnd - type handle of SIMD vector -// size - SIMD vector size -// pOp1 - in-out parameter; first operand -// pOp2 - in-out parameter; second operand -// -// Return Value: -// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands -// -SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd, - unsigned size, - GenTree** pOp1, - GenTree** pOp2) -{ - var_types simdType = (*pOp1)->TypeGet(); - assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); - - // GreaterThan(v1, v2) where v1 and v2 are vector long. - // Let us consider the case of single long element comparison. - // say L1 = (x1, y1) and L2 = (x2, y2) where x1, y1, x2, and y2 are 32-bit integers that comprise the longs L1 and - // L2. - // - // GreaterThan(L1, L2) can be expressed in terms of > relationship between 32-bit integers that comprise L1 and L2 - // as - // = (x1, y1) > (x2, y2) - // = (x1 > x2) || [(x1 == x2) && (y1 > y2)] - eq (1) - // - // t = (v1 > v2) 32-bit signed comparison - // u = (v1 == v2) 32-bit sized element equality - // v = (v1 > v2) 32-bit unsigned comparison - // - // z = shuffle(t, (3, 3, 1, 1)) - This corresponds to (x1 > x2) in eq(1) above - // t1 = Shuffle(v, (2, 2, 0, 0)) - This corresponds to (y1 > y2) in eq(1) above - // u1 = Shuffle(u, (3, 3, 1, 1)) - This corresponds to (x1 == x2) in eq(1) above - // w = And(t1, u1) - This corresponds to [(x1 == x2) && (y1 > y2)] in eq(1) above - // Result = BitwiseOr(z, w) - - // Since op1 and op2 gets used multiple times, make sure side effects are computed. - GenTree* dupOp1 = nullptr; - GenTree* dupOp2 = nullptr; - GenTree* dupDupOp1 = nullptr; - GenTree* dupDupOp2 = nullptr; - - if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd); - dupDupOp1 = gtNewLclvNode(dupOp1->AsLclVarCommon()->GetLclNum(), simdType); - } - else - { - dupOp1 = gtCloneExpr(*pOp1); - dupDupOp1 = gtCloneExpr(*pOp1); - } - - if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd); - dupDupOp2 = gtNewLclvNode(dupOp2->AsLclVarCommon()->GetLclNum(), simdType); - } - else - { - dupOp2 = gtCloneExpr(*pOp2); - dupDupOp2 = gtCloneExpr(*pOp2); - } - - assert(dupDupOp1 != nullptr && dupDupOp2 != nullptr); - assert(dupOp1 != nullptr && dupOp2 != nullptr); - assert(*pOp1 != nullptr && *pOp2 != nullptr); - - // v1GreaterThanv2Signed - signed 32-bit comparison - GenTree* v1GreaterThanv2Signed = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicGreaterThan, TYP_INT, size); - - // v1Equalsv2 - 32-bit equality - GenTree* v1Equalsv2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicEqual, TYP_INT, size); - - // v1GreaterThanv2Unsigned - unsigned 32-bit comparison - var_types tempBaseType = TYP_UINT; - SIMDIntrinsicID sid = impSIMDRelOp(SIMDIntrinsicGreaterThan, typeHnd, size, &tempBaseType, &dupDupOp1, &dupDupOp2); - GenTree* v1GreaterThanv2Unsigned = gtNewSIMDNode(simdType, dupDupOp1, dupDupOp2, sid, tempBaseType, size); - - GenTree* z = gtNewSIMDNode(simdType, v1GreaterThanv2Signed, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), - SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); - GenTree* t1 = gtNewSIMDNode(simdType, v1GreaterThanv2Unsigned, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), - SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); - GenTree* u1 = gtNewSIMDNode(simdType, v1Equalsv2, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), SIMDIntrinsicShuffleSSE2, - TYP_FLOAT, size); - GenTree* w = gtNewSIMDNode(simdType, u1, t1, SIMDIntrinsicBitwiseAnd, TYP_INT, size); - - *pOp1 = z; - *pOp2 = w; - return SIMDIntrinsicBitwiseOr; -} #endif // TARGET_XARCH // Transforms operands and returns the SIMD intrinsic to be applied on @@ -1390,30 +1292,9 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, if (varTypeIsFloating(baseType)) { - // SSE2/AVX doesn't support > and >= on vector float/double. - // Therefore, we need to use < and <= with swapped operands - if (relOpIntrinsicId == SIMDIntrinsicGreaterThan) - { - GenTree* tmp = *pOp1; - *pOp1 = *pOp2; - *pOp2 = tmp; - - intrinsicID = SIMDIntrinsicLessThan; - } } else if (varTypeIsIntegral(baseType)) { - // SSE/AVX doesn't support < and <= on integer base type vectors. - // Therefore, we need to use > and >= with swapped operands. - if (intrinsicID == SIMDIntrinsicLessThan) - { - GenTree* tmp = *pOp1; - *pOp1 = *pOp2; - *pOp2 = tmp; - - intrinsicID = SIMDIntrinsicGreaterThan; - } - if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && baseType == TYP_LONG) { // There is no direct SSE2 support for comparing TYP_LONG vectors. @@ -1422,10 +1303,6 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, { intrinsicID = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2); } - else if (intrinsicID == SIMDIntrinsicGreaterThan) - { - intrinsicID = impSIMDLongRelOpGreaterThan(typeHnd, size, pOp1, pOp2); - } else { unreached(); @@ -1507,20 +1384,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, return impSIMDRelOp(intrinsicID, typeHnd, size, inOutBaseType, pOp1, pOp2); } } -#elif defined(TARGET_ARM64) - // TODO-ARM64-CQ handle comparisons against zero - - // TARGET_ARM64 doesn't support < and <= on register register comparisons - // Therefore, we need to use > and >= with swapped operands. - if (intrinsicID == SIMDIntrinsicLessThan) - { - GenTree* tmp = *pOp1; - *pOp1 = *pOp2; - *pOp2 = tmp; - - intrinsicID = SIMDIntrinsicGreaterThan; - } -#else // !TARGET_XARCH +#elif !defined(TARGET_ARM64) assert(!"impSIMDRelOp() unimplemented on target arch"); unreached(); #endif // !TARGET_XARCH @@ -2397,8 +2261,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, break; case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicGreaterThan: { op2 = impSIMDPopStack(simdType); op1 = impSIMDPopStack(simdType, instMethod); diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index ef441009b39ab..cbfbfb25d69ed 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -250,47 +250,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicLessThan: - // Packed integers use > with swapped operands - assert(baseType != TYP_INT); - - if (baseType == TYP_FLOAT) - { - result = INS_cmpps; - assert(ival != nullptr); - *ival = 1; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_cmppd; - assert(ival != nullptr); - *ival = 1; - } - break; - - case SIMDIntrinsicGreaterThan: - // Packed float/double use < with swapped operands - assert(!varTypeIsFloating(baseType)); - - // SSE2 supports only signed > - if (baseType == TYP_INT) - { - result = INS_pcmpgtd; - } - else if (baseType == TYP_SHORT) - { - result = INS_pcmpgtw; - } - else if (baseType == TYP_BYTE) - { - result = INS_pcmpgtb; - } - else if ((baseType == TYP_LONG) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)) - { - result = INS_pcmpgtq; - } - break; - case SIMDIntrinsicBitwiseAnd: if (baseType == TYP_FLOAT) { @@ -1411,7 +1370,26 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) genSIMDZero(simdType, baseType, tmpReg); if (!varTypeIsUnsigned(baseType)) { - instruction compareIns = getOpForSIMDIntrinsic(SIMDIntrinsicGreaterThan, baseType); + instruction compareIns = INS_invalid; + + if (baseType == TYP_INT) + { + compareIns = INS_pcmpgtd; + } + else if (baseType == TYP_SHORT) + { + compareIns = INS_pcmpgtw; + } + else if (baseType == TYP_BYTE) + { + compareIns = INS_pcmpgtb; + } + else if ((baseType == TYP_LONG) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)) + { + compareIns = INS_pcmpgtq; + } + + assert(compareIns != INS_invalid); inst_RV_RV(compareIns, tmpReg, targetReg, simdType, emitSize); } inst_RV_RV(widenIns, targetReg, tmpReg, simdType); @@ -1778,7 +1756,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) switch (simdNode->gtSIMDIntrinsicID) { case SIMDIntrinsicEqual: - case SIMDIntrinsicGreaterThan: { assert(targetReg != REG_NA); @@ -1791,12 +1768,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) } #endif - // Greater-than: Floating point vectors use "<" with swapped operands - if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan) - { - assert(!varTypeIsFloating(baseType)); - } - unsigned ival = 0; instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival); @@ -1828,29 +1799,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) } break; - case SIMDIntrinsicLessThan: - { - assert(targetReg != REG_NA); - - // Int vectors use ">" and ">=" with swapped operands - assert(varTypeIsFloating(baseType)); - - // Get the instruction opcode for compare operation - unsigned ival; - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival); - - // targetReg = op1reg RelOp op2reg - // Thefore, we can optimize if op1Reg == targetReg - if (op1Reg != targetReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType)); - } - - assert((ival >= 0) && (ival <= 255)); - GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, op2Reg, (int8_t)ival); - } - break; - // (In)Equality that produces bool result instead of a bit vector case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: @@ -3039,8 +2987,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicGreaterThan: genSIMDIntrinsicRelOp(simdNode); break; diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index 77709a6b7a499..99d878f2c2ec4 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -98,8 +98,6 @@ SIMD_INTRINSIC("op_Division", false, Div, // Vector Relational operators SIMD_INTRINSIC("Equals", false, Equal, "eq", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("LessThan", false, LessThan, "lt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("GreaterThan", false, GreaterThan, "gt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) // Bitwise operations SIMD_INTRINSIC("op_BitwiseAnd", false, BitwiseAnd, "&", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) From ce8433aebac994689ce6f913cc4aba611c67965e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 13:18:43 -0700 Subject: [PATCH 16/24] Removing SIMDIntrinsicInstEquals --- src/coreclr/src/jit/lsraarm64.cpp | 1 - src/coreclr/src/jit/simd.cpp | 1 - src/coreclr/src/jit/simdintrinsiclist.h | 3 --- 3 files changed, 5 deletions(-) diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index d08d188ceef3e..2217284195871 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -926,7 +926,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicGetY: case SIMDIntrinsicGetZ: case SIMDIntrinsicGetW: - case SIMDIntrinsicInstEquals: case SIMDIntrinsicHWAccel: case SIMDIntrinsicWiden: case SIMDIntrinsicInvalid: diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 779aa72289ec3..784a473a23c3c 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -2228,7 +2228,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, break; case SIMDIntrinsicOpEquality: - case SIMDIntrinsicInstEquals: { op2 = impSIMDPopStack(simdType); op1 = impSIMDPopStack(simdType, instMethod); diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index 99d878f2c2ec4..e5faf6cd11c3c 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -76,9 +76,6 @@ SIMD_INTRINSIC("set_Y", true, SetY, SIMD_INTRINSIC("set_Z", true, SetZ, "setZ", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) SIMD_INTRINSIC("set_W", true, SetW, "setW", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -// Object.Equals() -SIMD_INTRINSIC("Equals", true, InstEquals, "equals", TYP_BOOL, 2, {TYP_BYREF, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - // Operator == and != SIMD_INTRINSIC("op_Equality", false, OpEquality, "==", TYP_BOOL, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("op_Inequality", false, OpInEquality, "!=", TYP_BOOL, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) From e8c20094688f816d1c6ce97c6ed94c9b80d27ca0 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 16 May 2020 13:43:48 -0700 Subject: [PATCH 17/24] Removing SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality --- src/coreclr/src/jit/codegenarm64.cpp | 64 ---------------- src/coreclr/src/jit/gentree.cpp | 10 +-- src/coreclr/src/jit/gentree.h | 15 ---- src/coreclr/src/jit/lowerarmarch.cpp | 5 -- src/coreclr/src/jit/lowerxarch.cpp | 21 ------ src/coreclr/src/jit/lsraarm64.cpp | 5 -- src/coreclr/src/jit/lsrabuild.cpp | 2 +- src/coreclr/src/jit/lsraxarch.cpp | 41 ++-------- src/coreclr/src/jit/simd.cpp | 32 -------- src/coreclr/src/jit/simdcodegenxarch.cpp | 96 ------------------------ src/coreclr/src/jit/simdintrinsiclist.h | 4 - 11 files changed, 7 insertions(+), 288 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 86f7b017e7d6f..04e24dfce4bb2 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3848,11 +3848,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicBinOp(simdNode); break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - genSIMDIntrinsicRelOp(simdNode); - break; - case SIMDIntrinsicDotProduct: genSIMDIntrinsicDotProduct(simdNode); break; @@ -4357,65 +4352,6 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) genProduceReg(simdNode); } -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater -// == and != -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) -{ - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality); - - GenTree* op1 = simdNode->gtGetOp1(); - GenTree* op2 = simdNode->gtGetOp2(); - var_types baseType = simdNode->gtSIMDBaseType; - regNumber targetReg = simdNode->GetRegNum(); - var_types targetType = simdNode->TypeGet(); - - genConsumeOperands(simdNode); - regNumber op1Reg = op1->GetRegNum(); - regNumber op2Reg = op2->GetRegNum(); - regNumber otherReg = op2Reg; - - instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, baseType); - emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; - insOpts opt = genGetSimdInsOpt(attr, baseType); - - // TODO-ARM64-CQ Contain integer constants where possible - - regNumber tmpFloatReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); - - GetEmitter()->emitIns_R_R_R(ins, attr, tmpFloatReg, op1Reg, op2Reg, opt); - - if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) - { - // For 12Byte vectors we must set upper bits to get correct comparison - // We do not assume upper bits are zero. - instGen_Set_Reg_To_Imm(EA_4BYTE, targetReg, -1); - GetEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpFloatReg, targetReg, 3); - } - - GetEmitter()->emitIns_R_R(INS_uminv, attr, tmpFloatReg, tmpFloatReg, - (simdNode->gtSIMDSize > 8) ? INS_OPTS_16B : INS_OPTS_8B); - - GetEmitter()->emitIns_R_R_I(INS_mov, EA_1BYTE, targetReg, tmpFloatReg, 0); - - if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality) - { - GetEmitter()->emitIns_R_R_I(INS_eor, EA_4BYTE, targetReg, targetReg, 0x1); - } - - GetEmitter()->emitIns_R_R_I(INS_and, EA_4BYTE, targetReg, targetReg, 0x1); - - genProduceReg(simdNode); -} - //-------------------------------------------------------------------------------- // genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product. // diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 3a0f498311f1e..ddac5d4386735 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -16511,13 +16511,7 @@ bool GenTree::isContained() const { // We have to cast away const-ness since AsOp() method is non-const. const GenTree* childNode = AsOp()->gtGetOp1(); - assert((isMarkedContained == false) || childNode->IsSIMDEqualityOrInequality()); - } - - // these either produce a result in register or set flags reg. - else if (IsSIMDEqualityOrInequality()) - { - assert(!isMarkedContained); + assert(isMarkedContained == false); } // if it's contained it can't be unused. @@ -18444,8 +18438,6 @@ bool GenTree::isCommutativeSIMDIntrinsic() case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicEqual: case SIMDIntrinsicMul: - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: return true; default: return false; diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 7ba139dfa8893..aa9bba663914a 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -1650,8 +1650,6 @@ struct GenTree inline bool IsBoxedValue(); - inline bool IsSIMDEqualityOrInequality() const; - static bool OperIsList(genTreeOps gtOper) { return gtOper == GT_LIST; @@ -6867,19 +6865,6 @@ inline bool GenTree::IsBoxedValue() return (gtOper == GT_BOX) && (gtFlags & GTF_BOX_VALUE); } -inline bool GenTree::IsSIMDEqualityOrInequality() const -{ -#ifdef FEATURE_SIMD - if (gtOper == GT_SIMD) - { - SIMDIntrinsicID id = AsSIMD()->gtSIMDIntrinsicID; - return (id == SIMDIntrinsicOpEquality) || (id == SIMDIntrinsicOpInEquality); - } -#endif - - return false; -} - inline GenTree* GenTree::MoveNext() { assert(OperIsAnyList()); diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index 1f20b1a5cd227..c192ee2fc1400 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -1219,11 +1219,6 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2()); break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - // TODO-ARM64-CQ Support containing 0 - break; - case SIMDIntrinsicGetItem: { // This implements get_Item method. The sources are: diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 4d0428d5b9e45..5b67c184e10cf 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -743,14 +743,6 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) // the addr of SIMD vector with the given index. simdNode->gtOp1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; } - else if (simdNode->IsSIMDEqualityOrInequality()) - { - LowerNodeCC(simdNode, - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality ? GenCondition::EQ : GenCondition::NE); - - simdNode->gtType = TYP_VOID; - simdNode->ClearUnusedValue(); - } #endif ContainCheckSIMD(simdNode); } @@ -3878,19 +3870,6 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2()); break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - // On SSE4/AVX, we can generate optimal code for (in)equality - // against zero using ptest. We can safely do this optimization - // for integral vectors but not for floating-point for the reason - // that we have +0.0 and -0.0 and +0.0 == -0.0 - op2 = simdNode->gtGetOp2(); - if ((comp->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0)) - { - MakeSrcContained(simdNode, op2); - } - break; - case SIMDIntrinsicGetItem: { // This implements get_Item method. The sources are: diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 2217284195871..10263cfc90e64 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -904,11 +904,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) // We have an array and an index, which may be contained. break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - buildInternalFloatRegisterDefForNode(simdTree); - break; - case SIMDIntrinsicDotProduct: buildInternalFloatRegisterDefForNode(simdTree); break; diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index 44462d7a91ada..22fc482ac1f30 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -1507,7 +1507,7 @@ int LinearScan::ComputeOperandDstCount(GenTree* operand) // Stores and void-typed operands may be encountered when processing call nodes, which contain // pointers to argument setup stores. assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || - operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() || + operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->TypeGet() == TYP_VOID); return 0; } diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 70bb5aae64f74..46070aec0d2c9 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1866,21 +1866,17 @@ int LinearScan::BuildIntrinsic(GenTree* tree) // int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { - // Only SIMDIntrinsicInit can be contained. Other than that, - // only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount. - int dstCount = simdTree->IsValue() ? 1 : 0; + // All intrinsics have a dstCount of 1 + assert(simdTree->IsValue()); + bool buildUses = true; regMaskTP dstCandidates = RBM_NONE; if (simdTree->isContained()) { + // Only SIMDIntrinsicInit can be contained assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); } - else if (dstCount != 1) - { - assert((simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) || - (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality)); - } SetContainsAVXFlags(simdTree->gtSIMDSize); GenTree* op1 = simdTree->gtGetOp1(); GenTree* op2 = simdTree->gtGetOp2(); @@ -1977,26 +1973,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicEqual: break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - if (simdTree->gtGetOp2()->isContained()) - { - // If the second operand is contained then ContainCheckSIMD has determined - // that PTEST can be used. We only need a single source register and no - // internal registers. - } - else - { - // Can't use PTEST so we need 2 source registers, 1 internal SIMD register - // (to hold the result of PCMPEQD or other similar SIMD compare instruction) - // and one internal INT register (to hold the result of PMOVMSKB). - buildInternalIntRegisterDefForNode(simdTree); - buildInternalFloatRegisterDefForNode(simdTree); - } - // These SIMD nodes only set the condition flags. - dstCount = 0; - break; - case SIMDIntrinsicDotProduct: // Float/Double vectors: // For SSE, or AVX with 32-byte vectors, we also need an internal register @@ -2224,14 +2200,7 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) srcCount = BuildRMWUses(simdTree); } buildInternalRegisterUses(); - if (dstCount == 1) - { - BuildDef(simdTree, dstCandidates); - } - else - { - assert(dstCount == 0); - } + BuildDef(simdTree, dstCandidates); return srcCount; } #endif // FEATURE_SIMD diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 784a473a23c3c..3bd38ef537199 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -2227,38 +2227,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, } break; - case SIMDIntrinsicOpEquality: - { - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType, instMethod); - - assert(op1->TypeGet() == simdType); - assert(op2->TypeGet() == simdType); - - simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpEquality, baseType, size); - if (simdType == TYP_SIMD12) - { - simdTree->gtFlags |= GTF_SIMD12_OP; - } - retVal = simdTree; - } - break; - - case SIMDIntrinsicOpInEquality: - { - // op1 is the first operand - // op2 is the second operand - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType, instMethod); - simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpInEquality, baseType, size); - if (simdType == TYP_SIMD12) - { - simdTree->gtFlags |= GTF_SIMD12_OP; - } - retVal = simdTree; - } - break; - case SIMDIntrinsicEqual: { op2 = impSIMDPopStack(simdType); diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index cbfbfb25d69ed..e6aa4db08fe54 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -1799,100 +1799,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) } break; - // (In)Equality that produces bool result instead of a bit vector - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - { - // We're only setting condition flags, if a 0/1 value is desired then Lowering should have inserted a SETCC. - assert(targetReg == REG_NA); - - var_types simdType = op1->TypeGet(); - // TODO-1stClassStructs: Temporary to minimize asmDiffs - if (simdType == TYP_DOUBLE) - { - simdType = TYP_SIMD8; - } - - // Here we should consider TYP_SIMD12 operands as if they were TYP_SIMD16 - // since both the operands will be in XMM registers. - if (simdType == TYP_SIMD12) - { - simdType = TYP_SIMD16; - } - - // On SSE4/AVX, we can generate optimal code for (in)equality against zero using ptest. - if (op2->isContained()) - { - assert((compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0)); - inst_RV_RV(INS_ptest, op1->GetRegNum(), op1->GetRegNum(), simdType, emitActualTypeSize(simdType)); - } - else - { - // We need one additional SIMD register to store the result of the SIMD compare. - regNumber tmpReg1 = simdNode->GetSingleTempReg(RBM_ALLFLOAT); - - // tmpReg1 = (op1Reg == op2Reg) - // Call this value of tmpReg1 as 'compResult' for further reference below. - regNumber otherReg = op2Reg; - if (tmpReg1 != op2Reg) - { - if (tmpReg1 != op1Reg) - { - inst_RV_RV(ins_Copy(simdType), tmpReg1, op1Reg, simdType, emitActualTypeSize(simdType)); - } - } - else - { - otherReg = op1Reg; - } - - // For all integer types we can use TYP_INT comparison. - unsigned ival = 0; - instruction ins = - getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival); - - if (varTypeIsFloating(baseType)) - { - assert((ival >= 0) && (ival <= 255)); - GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, (int8_t)ival); - } - else - { - inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType)); - } - - regNumber intReg = simdNode->GetSingleTempReg(RBM_ALLINT); - inst_RV_RV(INS_pmovmskb, intReg, tmpReg1, simdType, emitActualTypeSize(simdType)); - // There's no pmovmskw/pmovmskd/pmovmskq but they're not needed anyway. Vector compare - // instructions produce "all ones"/"all zeroes" components and pmovmskb extracts a - // subset of each component's ones/zeroes. In the end we need to know if the result is - // "all ones" where the number of ones is given by the vector byte size, not by the - // vector component count. So, for AVX registers we need to compare to 0xFFFFFFFF and - // for SSE registers we need to compare to 0x0000FFFF. - // The SIMD12 case is handled specially, because we can't rely on the upper bytes being - // zero, so we must compare only the lower 3 floats (hence the byte mask of 0xFFF). - // Note that -1 is used instead of 0xFFFFFFFF, on x64 emit doesn't correctly recognize - // that 0xFFFFFFFF can be encoded in a single byte and emits the longer 3DFFFFFFFF - // encoding instead of 83F8FF. - ssize_t mask; - if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) - { - mask = 0x00000FFF; - GetEmitter()->emitIns_R_I(INS_and, EA_4BYTE, intReg, mask); - } - else if (emitActualTypeSize(simdType) == 32) - { - mask = -1; - } - else - { - mask = 0x0000FFFF; - } - GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, mask); - } - } - break; - default: noway_assert(!"Unimplemented SIMD relational operation."); unreached(); @@ -2984,8 +2890,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicBinOp(simdNode); break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: case SIMDIntrinsicEqual: genSIMDIntrinsicRelOp(simdNode); break; diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index e5faf6cd11c3c..813a937fd056b 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -76,10 +76,6 @@ SIMD_INTRINSIC("set_Y", true, SetY, SIMD_INTRINSIC("set_Z", true, SetZ, "setZ", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) SIMD_INTRINSIC("set_W", true, SetW, "setW", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -// Operator == and != -SIMD_INTRINSIC("op_Equality", false, OpEquality, "==", TYP_BOOL, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("op_Inequality", false, OpInEquality, "!=", TYP_BOOL, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - // Arithmetic Operations SIMD_INTRINSIC("op_Addition", false, Add, "+", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("op_Subtraction", false, Sub, "-", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) From d98776d3729e9af837f3e52b8da1ce380efd80bf Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 18 May 2020 09:14:42 -0700 Subject: [PATCH 18/24] Porting this.Equals to use SimdAsHWIntrinsic --- src/coreclr/src/jit/hwintrinsic.cpp | 11 +- src/coreclr/src/jit/lowerarmarch.cpp | 2 +- src/coreclr/src/jit/lowerxarch.cpp | 18 +- src/coreclr/src/jit/namedintrinsiclist.h | 4 +- src/coreclr/src/jit/simdashwintrinsic.cpp | 90 +++++--- .../src/jit/simdashwintrinsiclistarm64.h | 153 +++++++------ .../src/jit/simdashwintrinsiclistxarch.h | 210 ++++++++++-------- .../src/System/Numerics/Vector.tt | 1 + .../src/System/Numerics/Vector2_Intrinsics.cs | 1 + .../src/System/Numerics/Vector3_Intrinsics.cs | 1 + .../src/System/Numerics/Vector4_Intrinsics.cs | 1 + 11 files changed, 296 insertions(+), 196 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index d0b2c1b53f338..fe470025888f7 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -482,11 +482,14 @@ bool HWIntrinsicInfo::isImmOp(NamedIntrinsic id, const GenTree* op) GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr) { GenTree* arg = nullptr; - if (argType == TYP_STRUCT) + if (varTypeIsStruct(argType)) { - unsigned int argSizeBytes; - var_types base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes); - argType = getSIMDTypeForSize(argSizeBytes); + if (!varTypeIsSIMD(argType)) + { + unsigned int argSizeBytes; + var_types base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes); + argType = getSIMDTypeForSize(argSizeBytes); + } assert(varTypeIsSIMD(argType)); arg = impSIMDPopStack(argType, expectAddr); assert(varTypeIsSIMD(arg->TypeGet())); diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index c192ee2fc1400..bf0a7f3507891 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -665,7 +665,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) GenTree* zroCns = comp->gtNewIconNode(0, TYP_INT); BlockRange().InsertAfter(msk, zroCns); - GenTree* val = comp->gtNewSimdAsHWIntrinsicNode(simdType, msk, zroCns, NI_AdvSimd_Extract, TYP_UBYTE, simdSize); + GenTree* val = comp->gtNewSimdAsHWIntrinsicNode(TYP_UBYTE, msk, zroCns, NI_AdvSimd_Extract, TYP_UBYTE, simdSize); BlockRange().InsertAfter(zroCns, val); zroCns = comp->gtNewIconNode(0, TYP_INT); diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 5b67c184e10cf..475e08226065c 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -1139,13 +1139,24 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); + GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE; + if (op2->IsIntegralConstVector(0) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) { // On SSE4.1 or higher we can optimize comparisons against zero to // just use PTEST. We can't support it for floating-point, however, // as it has both +0.0 and -0.0 where +0.0 == -0.0 - GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE; + node->gtOp1 = op1; + BlockRange().Remove(op2); + + LIR::Use op1Use(BlockRange(), &node->gtOp1, node); + ReplaceWithLclVar(op1Use); + op1 = node->gtOp1; + + op2 = comp->gtClone(op1); + BlockRange().InsertAfter(op1, op2); + node->gtOp2 = op2; if (simdSize == 32) { @@ -1321,6 +1332,11 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) node->gtOp1 = msk; node->gtOp2 = mskCns; + GenTree* cc = LowerNodeCC(node, cmpCnd); + + node->gtType = TYP_VOID; + node->ClearUnusedValue(); + LowerNode(node); } diff --git a/src/coreclr/src/jit/namedintrinsiclist.h b/src/coreclr/src/jit/namedintrinsiclist.h index 457d434898c9b..dbef8c0c0068e 100644 --- a/src/coreclr/src/jit/namedintrinsiclist.h +++ b/src/coreclr/src/jit/namedintrinsiclist.h @@ -44,10 +44,10 @@ enum NamedIntrinsic : unsigned short NI_SIMD_AS_HWINTRINSIC_START, #if defined(TARGET_XARCH) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##id, #include "simdashwintrinsiclistxarch.h" #elif defined(TARGET_ARM64) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##id, #include "simdashwintrinsiclistarm64.h" #endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) NI_SIMD_AS_HWINTRINSIC_END, diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 2b7113e354b0a..3e6ff3d76eb0f 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -10,12 +10,12 @@ static const SimdAsHWIntrinsicInfo simdAsHWIntrinsicInfoArray[] = { // clang-format off #if defined(TARGET_XARCH) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ - {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + {NI_##classId##_##id, name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, #include "simdashwintrinsiclistxarch.h" #elif defined(TARGET_ARM64) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ - {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + {NI_##classId##_##id, name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, #include "simdashwintrinsiclistarm64.h" #else #error Unsupported platform @@ -65,6 +65,15 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(CORINFO_SIG_INFO* sig, return NI_Illegal; } + unsigned numArgs = sig->numArgs; + bool isInstanceMethod = false; + + if (sig->hasThis()) + { + numArgs++; + isInstanceMethod = true; + } + for (int i = 0; i < (NI_SIMD_AS_HWINTRINSIC_END - NI_SIMD_AS_HWINTRINSIC_START - 1); i++) { const SimdAsHWIntrinsicInfo& intrinsicInfo = simdAsHWIntrinsicInfoArray[i]; @@ -74,12 +83,12 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(CORINFO_SIG_INFO* sig, continue; } - if (sig->numArgs != static_cast(intrinsicInfo.numArgs)) + if (numArgs != static_cast(intrinsicInfo.numArgs)) { continue; } - if (sig->hasThis() != SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsicInfo.id)) + if (isInstanceMethod != SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsicInfo.id)) { continue; } @@ -164,7 +173,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, { assert(!mustExpand); - CORINFO_CLASS_HANDLE argClass; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; if (!featureSIMD) { @@ -193,8 +202,22 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, baseType = getBaseTypeAndSizeOfSIMDType(argClass, &simdSize); } - if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (sig->numArgs != 0)) + unsigned numArgs = sig->numArgs; + bool isInstanceMethod = false; + + if (sig->hasThis()) + { + assert(SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic)); + numArgs++; + + isInstanceMethod = true; + argClass = clsHnd; + } + + if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (numArgs != 0)) { + assert(!isInstanceMethod); + // We need to fixup the clsHnd in the case we are an intrinsic on Vector // The first argument will be the appropriate Vector handle to use clsHnd = info.compCompHnd->getArgClass(sig, sig->args); @@ -250,9 +273,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; GenTree* op2 = nullptr; - bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); - - switch (sig->numArgs) + switch (numArgs) { case 0: { @@ -262,7 +283,8 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, case 1: { - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); @@ -271,11 +293,12 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, case 2: { - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = getArgForHWIntrinsic(argType, argClass); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)) @@ -317,19 +340,31 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(retType != TYP_UNKNOWN); assert(varTypeIsArithmetic(baseType)); assert(simdSize != 0); - assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); assert(SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType) == intrinsic); - CORINFO_ARG_LIST_HANDLE argList = sig->args; - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass; + var_types simdType = getSIMDTypeForSize(simdSize); + assert(varTypeIsSIMD(simdType)); + + CORINFO_ARG_LIST_HANDLE argList = sig->args; + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; GenTree* op1 = nullptr; GenTree* op2 = nullptr; GenTree* op3 = nullptr; SimdAsHWIntrinsicClassId classId = SimdAsHWIntrinsicInfo::lookupClassId(intrinsic); - bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + unsigned numArgs = sig->numArgs; + bool isInstanceMethod = false; + + if (sig->hasThis()) + { + assert(SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic)); + numArgs++; + + isInstanceMethod = true; + argClass = clsHnd; + } #if defined(TARGET_XARCH) bool isVectorT256 = (SimdAsHWIntrinsicInfo::lookupClassId(intrinsic) == SimdAsHWIntrinsicClassId::VectorT256); @@ -349,7 +384,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2)); #endif - switch (sig->numArgs) + switch (numArgs) { case 1: { @@ -372,7 +407,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, return op1; } - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); @@ -473,11 +509,12 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case 2: { - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = getArgForHWIntrinsic(argType, argClass); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); @@ -721,8 +758,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case 3: { - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); - CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); op3 = getArgForHWIntrinsic(argType, argClass); @@ -730,7 +767,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = getArgForHWIntrinsic(argType, argClass); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index 9de5ce65c8d17..f7290b84b9ae1 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -6,12 +6,24 @@ #ifndef SIMD_AS_HWINTRINSIC #error Define SIMD_AS_HWINTRINSIC before including this file #endif + +#if defined(SIMD_AS_HWINTRINSIC_ID) || defined(SIMD_AS_HWINTRINSIC_NM) +#error SIMD_AS_HWINTRINSIC_ID and SIMD_AS_HWINTRINSIC_NM should not be defined before including this file +#endif /*****************************************************************************/ // clang-format off #ifdef FEATURE_HW_INTRINSICS +// Defines a SimdAsHWIntrinsic where the name is implicitly taken from the id +#define SIMD_AS_HWINTRINSIC_ID(classId, id, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + SIMD_AS_HWINTRINSIC(classId, id, #id, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) + +// Defines a SimdAsHWIntrinsic where the name is explicit +#define SIMD_AS_HWINTRINSIC_NM(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) + /* Note * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic` * Each intrinsic has a `NumArg` for number of parameters @@ -22,87 +34,94 @@ */ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics -SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector2, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics -SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector3, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics -SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector4, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics -SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Ceiling, NI_AdvSimd_Ceiling}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Floor, NI_AdvSimd_Floor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, get_Count, 0, {NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Max, 2, {NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_AdvSimd_Max, NI_AdvSimd_Arm64_Max}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Min, 2, {NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_AdvSimd_Min, NI_AdvSimd_Arm64_Min}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Arm64_Add}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_AdvSimd_Arm64_Divide}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Equality, 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Inequality, 2, {NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_AdvSimd_Arm64_Sqrt}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Abs, 1, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Ceiling, NI_AdvSimd_Ceiling}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(VectorT128, EqualsInstance, "Equals", 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Floor, NI_AdvSimd_Floor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThanOrEqual, 2, {NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Max, 2, {NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_AdvSimd_Max, NI_AdvSimd_Arm64_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Min, 2, {NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_AdvSimd_Min, NI_AdvSimd_Arm64_Min}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Addition, 2, {NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Arm64_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_BitwiseAnd, 2, {NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_BitwiseOr, 2, {NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_AdvSimd_Arm64_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Equality, 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Inequality, 2, {NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_AdvSimd_Arm64_Sqrt}, SimdAsHWIntrinsicFlag::None) + +#undef SIMD_AS_HWINTRINSIC_NM +#undef SIMD_AS_HWINTRINSIC_ID #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index fef502163ee34..0a3131ecc9c8f 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -6,12 +6,24 @@ #ifndef SIMD_AS_HWINTRINSIC #error Define SIMD_AS_HWINTRINSIC before including this file #endif + +#if defined(SIMD_AS_HWINTRINSIC_ID) || defined(SIMD_AS_HWINTRINSIC_NM) +#error SIMD_AS_HWINTRINSIC_ID and SIMD_AS_HWINTRINSIC_NM should not be defined before including this file +#endif /*****************************************************************************/ // clang-format off #ifdef FEATURE_HW_INTRINSICS +// Defines a SimdAsHWIntrinsic where the name is implicitly taken from the id +#define SIMD_AS_HWINTRINSIC_ID(classId, id, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + SIMD_AS_HWINTRINSIC(classId, id, #id, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) + +// Defines a SimdAsHWIntrinsic where the name is explicit +#define SIMD_AS_HWINTRINSIC_NM(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) + /* Note * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic` * Each intrinsic has a `NumArg` for number of parameters @@ -22,119 +34,127 @@ */ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics -SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector2, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics -SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector3, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics -SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector4, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics -SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) -SIMD_AS_HWINTRINSIC(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Ceiling, NI_SSE41_Ceiling}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Floor, NI_SSE41_Floor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, get_Count, 0, {NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_SSE_CompareLessThanOrEqual, NI_SSE2_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Max, 2, {NI_VectorT128_Max, NI_SSE2_Max, NI_SSE2_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_SSE_Max, NI_SSE2_Max}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Min, 2, {NI_VectorT128_Min, NI_SSE2_Min, NI_SSE2_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_SSE_Min, NI_SSE2_Min}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE_Add, NI_SSE2_Add}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE_And, NI_SSE2_And}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Equality, 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Inequality, 2, {NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_SSE2_Sqrt}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Abs, 1, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Ceiling, NI_SSE41_Ceiling}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(VectorT128, EqualsInstance, "Equals", 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Floor, NI_SSE41_Floor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThanOrEqual, 2, {NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_SSE_CompareLessThanOrEqual, NI_SSE2_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Max, 2, {NI_VectorT128_Max, NI_SSE2_Max, NI_SSE2_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_SSE_Max, NI_SSE2_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Min, 2, {NI_VectorT128_Min, NI_SSE2_Min, NI_SSE2_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_SSE_Min, NI_SSE2_Min}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Addition, 2, {NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE_Add, NI_SSE2_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_BitwiseAnd, 2, {NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE_And, NI_SSE2_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Equality, 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Inequality, 2, {NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_SSE2_Sqrt}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics -SIMD_AS_HWINTRINSIC(VectorT256, Abs, 1, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) -SIMD_AS_HWINTRINSIC(VectorT256, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Ceiling, NI_AVX_Ceiling}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, ConditionalSelect, 3, {NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Floor, NI_AVX_Floor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, get_AllBitsSet, 0, {NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, get_Count, 0, {NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, get_Zero, 0, {NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, LessThanOrEqual, 2, {NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_AVX_CompareLessThanOrEqual, NI_AVX_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, Max, 2, {NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_AVX_Max, NI_AVX_Max}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, Min, 2, {NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_AVX_Min, NI_AVX_Min}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Addition, 2, {NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX_Add, NI_AVX_Add}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseAnd, 2, {NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX_And, NI_AVX_And}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Divide, NI_AVX_Divide}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Equality, 2, {NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_ExclusiveOr, 2, {NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX_Xor, NI_AVX_Xor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Explicit, 1, {NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Inequality, 2, {NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Multiply, NI_AVX_Multiply}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Subtraction, 2, {NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX_Subtract, NI_AVX_Subtract}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Sqrt, NI_AVX_Sqrt}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Abs, 1, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Ceiling, NI_AVX_Ceiling}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, ConditionalSelect, 3, {NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(VectorT256, EqualsInstance, "Equals", 2, {NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Floor, NI_AVX_Floor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, get_AllBitsSet, 0, {NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Count, 0, {NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Zero, 0, {NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, LessThanOrEqual, 2, {NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_AVX_CompareLessThanOrEqual, NI_AVX_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Max, 2, {NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_AVX_Max, NI_AVX_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Min, 2, {NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_AVX_Min, NI_AVX_Min}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Addition, 2, {NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX_Add, NI_AVX_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_BitwiseAnd, 2, {NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX_And, NI_AVX_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Divide, NI_AVX_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Equality, 2, {NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_ExclusiveOr, 2, {NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX_Xor, NI_AVX_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Explicit, 1, {NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Inequality, 2, {NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Multiply, NI_AVX_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Subtraction, 2, {NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX_Subtract, NI_AVX_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Sqrt, NI_AVX_Sqrt}, SimdAsHWIntrinsicFlag::None) + +#undef SIMD_AS_HWINTRINSIC_NM +#undef SIMD_AS_HWINTRINSIC_ID #endif // FEATURE_HW_INTRINSICS diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt index d0d379b727ae0..b556656e918c4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt @@ -330,6 +330,7 @@ namespace System.Numerics /// /// The vector to compare this instance to. /// True if the other vector is equal to this instance; False otherwise. + [Intrinsic] public readonly bool Equals(Vector other) { return this == other; diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs index f2bfc88180731..b776c5d0a3cad 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs @@ -88,6 +88,7 @@ public readonly void CopyTo(float[] array, int index) /// /// The Vector2 to compare this instance to. /// True if the other Vector2 is equal to this instance; False otherwise. + [Intrinsic] public readonly bool Equals(Vector2 other) { return this == other; diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs index 52877c54b16a8..3ff9a8aa6525a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs @@ -103,6 +103,7 @@ public readonly void CopyTo(float[] array, int index) /// /// The Vector3 to compare this instance to. /// True if the other Vector3 is equal to this instance; False otherwise. + [Intrinsic] public readonly bool Equals(Vector3 other) { return this == other; diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs index ee2e54842df38..4e95120c244c9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs @@ -133,6 +133,7 @@ public readonly void CopyTo(float[] array, int index) /// /// The Vector4 to compare this instance to. /// True if the other Vector4 is equal to this instance; False otherwise. + [Intrinsic] public readonly bool Equals(Vector4 other) { return this == other; From 47498ce4983e13585e65a0282ac43f9d5c8a46b3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 18 May 2020 11:53:34 -0700 Subject: [PATCH 19/24] Don't handle IEquatable`1.Equals via SimdAsHWIntrinsic --- src/coreclr/src/jit/simdashwintrinsic.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 3e6ff3d76eb0f..01829f5beff96 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -189,7 +189,13 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, // We want to resolve and populate the handle cache for this type even // if it isn't the basis for anything carried on the node. baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); - assert(simdSize != 0); + + if (simdSize == 0) + { + // We get here for a devirtualization of IEquatable`1.Equals + assert(!isSIMDClass(clsHnd)); + return nullptr; + } if (retType == TYP_STRUCT) { From 67cbbb8cac2811941f384ed4940fc69bc34bc17c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 18 May 2020 15:41:03 -0700 Subject: [PATCH 20/24] Applying formatting patch --- src/coreclr/src/jit/codegenarm64.cpp | 3 +-- src/coreclr/src/jit/compiler.h | 6 ++++-- src/coreclr/src/jit/lowerarmarch.cpp | 9 ++++----- src/coreclr/src/jit/lowerxarch.cpp | 6 ++---- src/coreclr/src/jit/lsrabuild.cpp | 3 +-- src/coreclr/src/jit/namedintrinsiclist.h | 6 ++++-- src/coreclr/src/jit/simdashwintrinsic.cpp | 10 +++++----- 7 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 04e24dfce4bb2..6b413e1db609b 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -4323,8 +4323,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual); + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual); GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 68eb6abfdd4ba..1fd8d8d547568 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -2621,8 +2621,10 @@ class Compiler var_types baseType, unsigned size); - GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode( - var_types type, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types type, + NamedIntrinsic hwIntrinsicID, + var_types baseType, + unsigned size) { GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, hwIntrinsicID, baseType, size); node->gtFlags |= GTF_SIMDASHW_OP; diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index bf0a7f3507891..b26d7e9285507 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -588,10 +588,8 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) unsigned simdSize = node->gtSIMDSize; var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - assert((intrinsicId == NI_Vector64_op_Equality) || - (intrinsicId == NI_Vector64_op_Inequality) || - (intrinsicId == NI_Vector128_op_Equality) || - (intrinsicId == NI_Vector128_op_Inequality)); + assert((intrinsicId == NI_Vector64_op_Equality) || (intrinsicId == NI_Vector64_op_Inequality) || + (intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality)); assert(varTypeIsSIMD(simdType)); assert(varTypeIsArithmetic(baseType)); @@ -651,7 +649,8 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) GenTree* insCns = comp->gtNewIconNode(cmpOp == GT_EQ ? -1 : 0, TYP_INT); BlockRange().InsertAfter(idxCns, insCns); - GenTree* tmp = comp->gtNewSimdAsHWIntrinsicNode(simdType, cmp, idxCns, insCns, NI_AdvSimd_Insert, TYP_INT, simdSize); + GenTree* tmp = + comp->gtNewSimdAsHWIntrinsicNode(simdType, cmp, idxCns, insCns, NI_AdvSimd_Insert, TYP_INT, simdSize); BlockRange().InsertAfter(insCns, tmp); LowerNode(tmp); diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 475e08226065c..e72d1748a5a94 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -1120,10 +1120,8 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) unsigned simdSize = node->gtSIMDSize; var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - assert((intrinsicId == NI_Vector128_op_Equality) || - (intrinsicId == NI_Vector128_op_Inequality) || - (intrinsicId == NI_Vector256_op_Equality) || - (intrinsicId == NI_Vector256_op_Inequality)); + assert((intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality) || + (intrinsicId == NI_Vector256_op_Equality) || (intrinsicId == NI_Vector256_op_Inequality)); assert(varTypeIsSIMD(simdType)); assert(varTypeIsArithmetic(baseType)); diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index 22fc482ac1f30..c1a5e0b586b40 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -1507,8 +1507,7 @@ int LinearScan::ComputeOperandDstCount(GenTree* operand) // Stores and void-typed operands may be encountered when processing call nodes, which contain // pointers to argument setup stores. assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || - operand->OperIsCompare() || operand->OperIs(GT_CMP) || - operand->TypeGet() == TYP_VOID); + operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->TypeGet() == TYP_VOID); return 0; } } diff --git a/src/coreclr/src/jit/namedintrinsiclist.h b/src/coreclr/src/jit/namedintrinsiclist.h index dbef8c0c0068e..c9a87d782a62c 100644 --- a/src/coreclr/src/jit/namedintrinsiclist.h +++ b/src/coreclr/src/jit/namedintrinsiclist.h @@ -44,10 +44,12 @@ enum NamedIntrinsic : unsigned short NI_SIMD_AS_HWINTRINSIC_START, #if defined(TARGET_XARCH) -#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##id, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + NI_##classId##_##id, #include "simdashwintrinsiclistxarch.h" #elif defined(TARGET_ARM64) -#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##id, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + NI_##classId##_##id, #include "simdashwintrinsiclistarm64.h" #endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) NI_SIMD_AS_HWINTRINSIC_END, diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 01829f5beff96..a39b67699ccb8 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -291,7 +291,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, { argType = isInstanceMethod ? simdType : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); return gtNewSimdAsHWIntrinsicNode(retType, op1, hwIntrinsic, baseType, simdSize); @@ -305,7 +305,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, argType = isInstanceMethod ? simdType : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)) { @@ -415,7 +415,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, argType = isInstanceMethod ? simdType : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); @@ -521,7 +521,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, argType = isInstanceMethod ? simdType : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); @@ -775,7 +775,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, argType = isInstanceMethod ? simdType : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); From d0b3c1e70f7d454196d43a964c181eddde7fb123 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 18 May 2020 20:29:40 -0700 Subject: [PATCH 21/24] Account for op2 being able to precede op1 in the LIR order --- src/coreclr/src/jit/lowerxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index e72d1748a5a94..6242c6f426b9c 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -1300,7 +1300,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) } GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, cmpType, simdSize); - BlockRange().InsertAfter(op2, cmp); + BlockRange().InsertBefore(node, cmp); LowerNode(cmp); GenTree* msk = comp->gtNewSimdHWIntrinsicNode(TYP_INT, cmp, mskIntrinsic, mskType, simdSize); From 484b173c278bfc30895b2c793e133c1bfe9e583c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 20 May 2020 09:31:08 -0700 Subject: [PATCH 22/24] Ensure SimdAsHWIntrinsic with 0 args are properly handled --- src/coreclr/src/jit/lowerarmarch.cpp | 1 + src/coreclr/src/jit/simdashwintrinsic.cpp | 73 ++++++++++++------- .../src/jit/simdashwintrinsiclistarm64.h | 2 +- .../src/jit/simdashwintrinsiclistxarch.h | 4 +- 4 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index b26d7e9285507..3085b36aa7db8 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -666,6 +666,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) GenTree* val = comp->gtNewSimdAsHWIntrinsicNode(TYP_UBYTE, msk, zroCns, NI_AdvSimd_Extract, TYP_UBYTE, simdSize); BlockRange().InsertAfter(zroCns, val); + LowerNode(val); zroCns = comp->gtNewIconNode(0, TYP_INT); BlockRange().InsertAfter(val, zroCns); diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index a39b67699ccb8..a4f07b0c2dc81 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -173,44 +173,35 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, { assert(!mustExpand); - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - if (!featureSIMD) { // We can't support SIMD intrinsics if the JIT doesn't support the feature return nullptr; } - var_types retType = JITtype2varType(sig->retType); - var_types baseType = TYP_UNKNOWN; - var_types simdType = TYP_UNKNOWN; - unsigned simdSize = 0; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + var_types retType = JITtype2varType(sig->retType); + var_types baseType = TYP_UNKNOWN; + var_types simdType = TYP_UNKNOWN; + unsigned simdSize = 0; + unsigned numArgs = sig->numArgs; + bool isInstanceMethod = false; // We want to resolve and populate the handle cache for this type even // if it isn't the basis for anything carried on the node. baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); - if (simdSize == 0) - { - // We get here for a devirtualization of IEquatable`1.Equals - assert(!isSIMDClass(clsHnd)); - return nullptr; - } - if (retType == TYP_STRUCT) { baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize); retType = getSIMDTypeForSize(simdSize); } - else + else if (numArgs != 0) { argClass = info.compCompHnd->getArgClass(sig, sig->args); baseType = getBaseTypeAndSizeOfSIMDType(argClass, &simdSize); } - unsigned numArgs = sig->numArgs; - bool isInstanceMethod = false; - if (sig->hasThis()) { assert(SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic)); @@ -219,11 +210,8 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, isInstanceMethod = true; argClass = clsHnd; } - - if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (numArgs != 0)) + else if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (numArgs != 0)) { - assert(!isInstanceMethod); - // We need to fixup the clsHnd in the case we are an intrinsic on Vector // The first argument will be the appropriate Vector handle to use clsHnd = info.compCompHnd->getArgClass(sig, sig->args); @@ -235,15 +223,16 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); } - simdType = getSIMDTypeForSize(simdSize); - assert(varTypeIsSIMD(simdType)); - - if (!varTypeIsArithmetic(baseType)) + if (!varTypeIsArithmetic(baseType) || (simdSize == 0)) { - // We only support intrinsics on the 10 primitive arithmetic types + // We get here for a devirtualization of IEquatable`1.Equals + // or if the user tries to use Vector with an unsupported type return nullptr; } + simdType = getSIMDTypeForSize(simdSize); + assert(varTypeIsSIMD(simdType)); + NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); if ((hwIntrinsic == NI_Illegal) || !varTypeIsSIMD(simdType)) @@ -392,6 +381,38 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, switch (numArgs) { + case 0: + { + switch (intrinsic) + { +#if defined(TARGET_XARCH) + case NI_VectorT128_get_Count: + case NI_VectorT256_get_Count: + { + GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, baseType), TYP_INT); + countNode->gtFlags |= GTF_ICON_SIMD_COUNT; + return countNode; + } +#elif defined(TARGET_ARM64) + case NI_VectorT128_get_Count: + { + GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, baseType), TYP_INT); + countNode->gtFlags |= GTF_ICON_SIMD_COUNT; + return countNode; + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + } + case 1: { bool isOpExplicit = (intrinsic == NI_VectorT128_op_Explicit); diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index f7290b84b9ae1..9621486e69657 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -100,7 +100,7 @@ SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, SIMD_AS_HWINTRINSIC_NM(VectorT128, EqualsInstance, "Equals", 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Floor, NI_AdvSimd_Floor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index 0a3131ecc9c8f..d13153db4aad7 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -100,7 +100,7 @@ SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, SIMD_AS_HWINTRINSIC_NM(VectorT128, EqualsInstance, "Equals", 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Floor, NI_SSE41_Floor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count, NI_Vector128_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) @@ -133,7 +133,7 @@ SIMD_AS_HWINTRINSIC_ID(VectorT256, Equals, SIMD_AS_HWINTRINSIC_NM(VectorT256, EqualsInstance, "Equals", 2, {NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(VectorT256, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Floor, NI_AVX_Floor}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, get_AllBitsSet, 0, {NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Count, 0, {NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count, NI_Vector256_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Count, 0, {NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Zero, 0, {NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) From 5869844bc2931cc37d4c29cfcb543d6d876fb2d8 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 20 May 2020 10:21:35 -0700 Subject: [PATCH 23/24] Fixup the arm64 LowerHWIntrinsicCmpOp implementation to use LowerNodeCC --- src/coreclr/src/jit/lowerarmarch.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index 3085b36aa7db8..5d47b12f8db14 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -636,7 +636,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) } GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, baseType, simdSize); - BlockRange().InsertAfter(op2, cmp); + BlockRange().InsertBefore(node, cmp); LowerNode(cmp); if ((baseType == TYP_FLOAT) && (simdSize == 12)) @@ -677,6 +677,16 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) node->gtOp1 = val; node->gtOp2 = zroCns; + // The CompareEqual will set (condition is true) or clear (condition is false) all bits of the respective element + // The MinAcross then ensures we get either all bits set (all conditions are true) or clear (any condition is false) + // So, we need to invert the condition from the operation since we compare against zero + + GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::NE : GenCondition::EQ; + GenTree* cc = LowerNodeCC(node, cmpCnd); + + node->gtType = TYP_VOID; + node->ClearUnusedValue(); + LowerNode(node); } From cc556f11f3cada09717a00c3478872c550f0105b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 20 May 2020 16:17:40 -0700 Subject: [PATCH 24/24] Fixing an assert in the NotSupported HWIntrinsic tests --- src/coreclr/src/jit/hwintrinsic.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index fe470025888f7..41f3c563fa162 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -608,9 +608,9 @@ static bool isSupportedBaseType(NamedIntrinsic intrinsic, var_types baseType) #ifdef TARGET_XARCH assert((intrinsic >= NI_Vector128_As && intrinsic <= NI_Vector128_AsUInt64) || - (intrinsic >= NI_Vector128_get_AllBitsSet && intrinsic <= NI_Vector128_ToVector256Unsafe) || + (intrinsic >= NI_Vector128_get_AllBitsSet && intrinsic <= NI_Vector128_WithElement) || (intrinsic >= NI_Vector256_As && intrinsic <= NI_Vector256_AsUInt64) || - (intrinsic >= NI_Vector256_get_AllBitsSet && intrinsic <= NI_Vector256_ToScalar)); + (intrinsic >= NI_Vector256_get_AllBitsSet && intrinsic <= NI_Vector256_WithElement)); #else assert((intrinsic >= NI_Vector64_AsByte && intrinsic <= NI_Vector64_AsUInt32) || (intrinsic >= NI_Vector64_get_AllBitsSet && intrinsic <= NI_Vector64_ToScalar) ||