From 6bc9303b938732f78ae960d3ee282ad53bd09f2f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 11 May 2020 20:53:47 -0700 Subject: [PATCH 01/10] Updating the Vector64 and Vector128 Create methods to be marked Intrinsic on ARM64 --- .../System/Runtime/Intrinsics/Vector128.cs | 150 +------ .../src/System/Runtime/Intrinsics/Vector64.cs | 399 ++++++++++++------ 2 files changed, 271 insertions(+), 278 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index db15743c6dcba..5caec4828975a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -388,7 +388,7 @@ static Vector128 SoftwareFallback(int value) [Intrinsic] public static unsafe Vector128 Create(long value) { - if (Sse2.X64.IsSupported || AdvSimd.IsSupported) + if (Sse2.X64.IsSupported || AdvSimd.Arm64.IsSupported) { return Create(value); } @@ -546,7 +546,7 @@ static Vector128 SoftwareFallback(uint value) [CLSCompliant(false)] public static unsafe Vector128 Create(ulong value) { - if (Sse2.X64.IsSupported || AdvSimd.IsSupported) + if (Sse2.X64.IsSupported || AdvSimd.Arm64.IsSupported) { return Create(value); } @@ -587,32 +587,10 @@ static Vector128 SoftwareFallback(ulong value) [Intrinsic] public static unsafe Vector128 Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse2.IsSupported) + if (Sse2.IsSupported || AdvSimd.IsSupported) { return Create(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - result = AdvSimd.Insert(result, 3, e3); - result = AdvSimd.Insert(result, 4, e4); - result = AdvSimd.Insert(result, 5, e5); - result = AdvSimd.Insert(result, 6, e6); - result = AdvSimd.Insert(result, 7, e7); - result = AdvSimd.Insert(result, 8, e8); - result = AdvSimd.Insert(result, 9, e9); - result = AdvSimd.Insert(result, 10, e10); - result = AdvSimd.Insert(result, 11, e11); - result = AdvSimd.Insert(result, 12, e12); - result = AdvSimd.Insert(result, 13, e13); - result = AdvSimd.Insert(result, 14, e14); - return AdvSimd.Insert(result, 15, e15); - } -#endif return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); @@ -650,18 +628,10 @@ static Vector128 SoftwareFallback(byte e0, byte e1, byte e2, byte e3, byte [Intrinsic] public static unsafe Vector128 Create(double e0, double e1) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse2.IsSupported) + if (Sse2.IsSupported || AdvSimd.IsSupported) { return Create(e0, e1); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - return AdvSimd.Insert(result, 1, e1); - } -#endif return SoftwareFallback(e0, e1); @@ -691,24 +661,10 @@ static Vector128 SoftwareFallback(double e0, double e1) [Intrinsic] public static unsafe Vector128 Create(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse2.IsSupported) + if (Sse2.IsSupported || AdvSimd.IsSupported) { return Create(e0, e1, e2, e3, e4, e5, e6, e7); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - result = AdvSimd.Insert(result, 3, e3); - result = AdvSimd.Insert(result, 4, e4); - result = AdvSimd.Insert(result, 5, e5); - result = AdvSimd.Insert(result, 6, e6); - return AdvSimd.Insert(result, 7, e7); - } -#endif return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7); @@ -740,20 +696,10 @@ static Vector128 SoftwareFallback(short e0, short e1, short e2, short e3, [Intrinsic] public static unsafe Vector128 Create(int e0, int e1, int e2, int e3) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse2.IsSupported) + if (Sse2.IsSupported || AdvSimd.IsSupported) { return Create(e0, e1, e2, e3); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - return AdvSimd.Insert(result, 3, e3); - } -#endif return SoftwareFallback(e0, e1, e2, e3); @@ -779,18 +725,10 @@ static Vector128 SoftwareFallback(int e0, int e1, int e2, int e3) [Intrinsic] public static unsafe Vector128 Create(long e0, long e1) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse2.X64.IsSupported) + if (Sse2.X64.IsSupported || AdvSimd.Arm64.IsSupported) { return Create(e0, e1); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - return AdvSimd.Insert(result, 1, e1); - } -#endif return SoftwareFallback(e0, e1); @@ -829,32 +767,10 @@ static Vector128 SoftwareFallback(long e0, long e1) [CLSCompliant(false)] public static unsafe Vector128 Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse2.IsSupported) + if (Sse2.IsSupported || AdvSimd.IsSupported) { return Create(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - result = AdvSimd.Insert(result, 3, e3); - result = AdvSimd.Insert(result, 4, e4); - result = AdvSimd.Insert(result, 5, e5); - result = AdvSimd.Insert(result, 6, e6); - result = AdvSimd.Insert(result, 7, e7); - result = AdvSimd.Insert(result, 8, e8); - result = AdvSimd.Insert(result, 9, e9); - result = AdvSimd.Insert(result, 10, e10); - result = AdvSimd.Insert(result, 11, e11); - result = AdvSimd.Insert(result, 12, e12); - result = AdvSimd.Insert(result, 13, e13); - result = AdvSimd.Insert(result, 14, e14); - return AdvSimd.Insert(result, 15, e15); - } -#endif return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); @@ -894,20 +810,10 @@ static Vector128 SoftwareFallback(sbyte e0, sbyte e1, sbyte e2, sbyte e3, [Intrinsic] public static unsafe Vector128 Create(float e0, float e1, float e2, float e3) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse.IsSupported) + if (Sse.IsSupported || AdvSimd.IsSupported) { return Create(e0, e1, e2, e3); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - return AdvSimd.Insert(result, 3, e3); - } -#endif return SoftwareFallback(e0, e1, e2, e3); @@ -940,24 +846,10 @@ static Vector128 SoftwareFallback(float e0, float e1, float e2, float e3) [CLSCompliant(false)] public static unsafe Vector128 Create(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse2.IsSupported) + if (Sse2.IsSupported || AdvSimd.IsSupported) { return Create(e0, e1, e2, e3, e4, e5, e6, e7); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - result = AdvSimd.Insert(result, 3, e3); - result = AdvSimd.Insert(result, 4, e4); - result = AdvSimd.Insert(result, 5, e5); - result = AdvSimd.Insert(result, 6, e6); - return AdvSimd.Insert(result, 7, e7); - } -#endif return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7); @@ -990,20 +882,10 @@ static Vector128 SoftwareFallback(ushort e0, ushort e1, ushort e2, ushor [CLSCompliant(false)] public static unsafe Vector128 Create(uint e0, uint e1, uint e2, uint e3) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse2.IsSupported) + if (Sse2.IsSupported || AdvSimd.IsSupported) { return Create(e0, e1, e2, e3); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - return AdvSimd.Insert(result, 3, e3); - } -#endif return SoftwareFallback(e0, e1, e2, e3); @@ -1030,18 +912,10 @@ static Vector128 SoftwareFallback(uint e0, uint e1, uint e2, uint e3) [CLSCompliant(false)] public static unsafe Vector128 Create(ulong e0, ulong e1) { -#if !TARGET_ARM && !TARGET_ARM64 - if (Sse2.X64.IsSupported) + if (Sse2.X64.IsSupported || AdvSimd.Arm64.IsSupported) { return Create(e0, e1); } -#else - if (AdvSimd.IsSupported) - { - Vector128 result = CreateScalarUnsafe(e0); - return AdvSimd.Insert(result, 1, e1); - } -#endif return SoftwareFallback(e0, e1); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 56cf62d2f8805..f376ebf3bce52 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -160,19 +160,29 @@ public static Vector64 AsUInt64(this Vector64 vector) [Intrinsic] public static unsafe Vector64 Create(byte value) { - byte* pResult = stackalloc byte[8] - { - value, - value, - value, - value, - value, - value, - value, - value, - }; + if (AdvSimd.IsSupported) + { + return Create(value); + } - return Unsafe.AsRef>(pResult); + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(byte value) + { + byte* pResult = stackalloc byte[8] + { + value, + value, + value, + value, + value, + value, + value, + value, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. @@ -181,7 +191,17 @@ public static unsafe Vector64 Create(byte value) [Intrinsic] public static unsafe Vector64 Create(double value) { - return Unsafe.As>(ref value); + if (AdvSimd.IsSupported) + { + return Create(value); + } + + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(double value) + { + return Unsafe.As>(ref value); + } } /// Creates a new instance with all elements initialized to the specified value. @@ -191,15 +211,25 @@ public static unsafe Vector64 Create(double value) [Intrinsic] public static unsafe Vector64 Create(short value) { - short* pResult = stackalloc short[4] + if (AdvSimd.IsSupported) { - value, - value, - value, - value, - }; + return Create(value); + } - return Unsafe.AsRef>(pResult); + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(short value) + { + short* pResult = stackalloc short[4] + { + value, + value, + value, + value, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. @@ -209,13 +239,23 @@ public static unsafe Vector64 Create(short value) [Intrinsic] public static unsafe Vector64 Create(int value) { - int* pResult = stackalloc int[2] + if (AdvSimd.IsSupported) { - value, - value, - }; + return Create(value); + } - return Unsafe.AsRef>(pResult); + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(int value) + { + int* pResult = stackalloc int[2] + { + value, + value, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. @@ -224,30 +264,50 @@ public static unsafe Vector64 Create(int value) [Intrinsic] public static unsafe Vector64 Create(long value) { - return Unsafe.As>(ref value); + if (AdvSimd.Arm64.IsSupported) + { + return Create(value); + } + + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(long value) + { + return Unsafe.As>(ref value); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// On x86, this method corresponds to __m64 _mm_set1_pi8 /// A new with all elements initialized to . - [CLSCompliant(false)] [Intrinsic] + [CLSCompliant(false)] public static unsafe Vector64 Create(sbyte value) { - sbyte* pResult = stackalloc sbyte[8] + if (AdvSimd.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - }; + return Create(value); + } - return Unsafe.AsRef>(pResult); + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(sbyte value) + { + sbyte* pResult = stackalloc sbyte[8] + { + value, + value, + value, + value, + value, + value, + value, + value, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. @@ -256,59 +316,99 @@ public static unsafe Vector64 Create(sbyte value) [Intrinsic] public static unsafe Vector64 Create(float value) { - float* pResult = stackalloc float[2] + if (AdvSimd.IsSupported) { - value, - value, - }; + return Create(value); + } - return Unsafe.AsRef>(pResult); + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(float value) + { + float* pResult = stackalloc float[2] + { + value, + value, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// On x86, this method corresponds to __m64 _mm_set1_pi16 /// A new with all elements initialized to . - [CLSCompliant(false)] [Intrinsic] + [CLSCompliant(false)] public static unsafe Vector64 Create(ushort value) { - ushort* pResult = stackalloc ushort[4] + if (AdvSimd.IsSupported) { - value, - value, - value, - value, - }; + return Create(value); + } - return Unsafe.AsRef>(pResult); + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(ushort value) + { + ushort* pResult = stackalloc ushort[4] + { + value, + value, + value, + value, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// On x86, this method corresponds to __m64 _mm_set1_pi32 /// A new with all elements initialized to . - [CLSCompliant(false)] [Intrinsic] + [CLSCompliant(false)] public static unsafe Vector64 Create(uint value) { - uint* pResult = stackalloc uint[2] + if (AdvSimd.IsSupported) { - value, - value, - }; + return Create(value); + } - return Unsafe.AsRef>(pResult); + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(uint value) + { + uint* pResult = stackalloc uint[2] + { + value, + value, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . - [CLSCompliant(false)] [Intrinsic] + [CLSCompliant(false)] public static unsafe Vector64 Create(ulong value) { - return Unsafe.As>(ref value); + if (AdvSimd.Arm64.IsSupported) + { + return Create(value); + } + + return SoftwareFallback(value); + + static Vector64 SoftwareFallback(ulong value) + { + return Unsafe.As>(ref value); + } } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -322,33 +422,32 @@ public static unsafe Vector64 Create(ulong value) /// The value that element 7 will be initialized to. /// On x86, this method corresponds to __m64 _mm_setr_pi8 /// A new with each element initialized to corresponding specified value. + [Intrinsic] public static unsafe Vector64 Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7) { if (AdvSimd.IsSupported) { - Vector64 result = Vector64.CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - result = AdvSimd.Insert(result, 3, e3); - result = AdvSimd.Insert(result, 4, e4); - result = AdvSimd.Insert(result, 5, e5); - result = AdvSimd.Insert(result, 6, e6); - return AdvSimd.Insert(result, 7, e7); + return Create(e0, e1, e2, e3, e4, e5, e6, e7); } - byte* pResult = stackalloc byte[8] + return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7); + + static Vector64 SoftwareFallback(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7) { - e0, - e1, - e2, - e3, - e4, - e5, - e6, - e7, - }; + byte* pResult = stackalloc byte[8] + { + e0, + e1, + e2, + e3, + e4, + e5, + e6, + e7, + }; - return Unsafe.AsRef>(pResult); + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -358,25 +457,28 @@ public static unsafe Vector64 Create(byte e0, byte e1, byte e2, byte e3, b /// The value that element 3 will be initialized to. /// On x86, this method corresponds to __m64 _mm_setr_pi16 /// A new with each element initialized to corresponding specified value. + [Intrinsic] public static unsafe Vector64 Create(short e0, short e1, short e2, short e3) { if (AdvSimd.IsSupported) { - Vector64 result = Vector64.CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - return AdvSimd.Insert(result, 3, e3); + return Create(e0, e1, e2, e3); } - short* pResult = stackalloc short[4] + return SoftwareFallback(e0, e1, e2, e3); + + static Vector64 SoftwareFallback(short e0, short e1, short e2, short e3) { - e0, - e1, - e2, - e3, - }; + short* pResult = stackalloc short[4] + { + e0, + e1, + e2, + e3, + }; - return Unsafe.AsRef>(pResult); + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -384,21 +486,26 @@ public static unsafe Vector64 Create(short e0, short e1, short e2, short /// The value that element 1 will be initialized to. /// On x86, this method corresponds to __m64 _mm_setr_pi32 /// A new with each element initialized to corresponding specified value. + [Intrinsic] public static unsafe Vector64 Create(int e0, int e1) { if (AdvSimd.IsSupported) { - Vector64 result = Vector64.CreateScalarUnsafe(e0); - return AdvSimd.Insert(result, 1, e1); + return Create(e0, e1); } - int* pResult = stackalloc int[2] + return SoftwareFallback(e0, e1); + + static Vector64 SoftwareFallback(int e0, int e1) { - e0, - e1, - }; + int* pResult = stackalloc int[2] + { + e0, + e1, + }; - return Unsafe.AsRef>(pResult); + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -412,55 +519,59 @@ public static unsafe Vector64 Create(int e0, int e1) /// The value that element 7 will be initialized to. /// On x86, this method corresponds to __m64 _mm_setr_pi8 /// A new with each element initialized to corresponding specified value. + [Intrinsic] [CLSCompliant(false)] public static unsafe Vector64 Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7) { if (AdvSimd.IsSupported) { - Vector64 result = Vector64.CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - result = AdvSimd.Insert(result, 3, e3); - result = AdvSimd.Insert(result, 4, e4); - result = AdvSimd.Insert(result, 5, e5); - result = AdvSimd.Insert(result, 6, e6); - return AdvSimd.Insert(result, 7, e7); + return Create(e0, e1, e2, e3, e4, e5, e6, e7); } - sbyte* pResult = stackalloc sbyte[8] + return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7); + + static Vector64 SoftwareFallback(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7) { - e0, - e1, - e2, - e3, - e4, - e5, - e6, - e7, - }; + sbyte* pResult = stackalloc sbyte[8] + { + e0, + e1, + e2, + e3, + e4, + e5, + e6, + e7, + }; - return Unsafe.AsRef>(pResult); + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with each element initialized to the corresponding specified value. /// The value that element 0 will be initialized to. /// The value that element 1 will be initialized to. /// A new with each element initialized to corresponding specified value. + [Intrinsic] public static unsafe Vector64 Create(float e0, float e1) { if (AdvSimd.IsSupported) { - Vector64 result = Vector64.CreateScalarUnsafe(e0); - return AdvSimd.Insert(result, 1, e1); + return Create(e0, e1); } - float* pResult = stackalloc float[2] + return SoftwareFallback(e0, e1); + + static Vector64 SoftwareFallback(float e0, float e1) { - e0, - e1, - }; + float* pResult = stackalloc float[2] + { + e0, + e1, + }; - return Unsafe.AsRef>(pResult); + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -470,26 +581,29 @@ public static unsafe Vector64 Create(float e0, float e1) /// The value that element 3 will be initialized to. /// On x86, this method corresponds to __m64 _mm_setr_pi16 /// A new with each element initialized to corresponding specified value. + [Intrinsic] [CLSCompliant(false)] public static unsafe Vector64 Create(ushort e0, ushort e1, ushort e2, ushort e3) { if (AdvSimd.IsSupported) { - Vector64 result = Vector64.CreateScalarUnsafe(e0); - result = AdvSimd.Insert(result, 1, e1); - result = AdvSimd.Insert(result, 2, e2); - return AdvSimd.Insert(result, 3, e3); + return Create(e0, e1, e2, e3); } - ushort* pResult = stackalloc ushort[4] + return SoftwareFallback(e0, e1, e2, e3); + + static Vector64 SoftwareFallback(ushort e0, ushort e1, ushort e2, ushort e3) { - e0, - e1, - e2, - e3, - }; + ushort* pResult = stackalloc ushort[4] + { + e0, + e1, + e2, + e3, + }; - return Unsafe.AsRef>(pResult); + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -497,22 +611,27 @@ public static unsafe Vector64 Create(ushort e0, ushort e1, ushort e2, us /// The value that element 1 will be initialized to. /// On x86, this method corresponds to __m64 _mm_setr_pi32 /// A new with each element initialized to corresponding specified value. + [Intrinsic] [CLSCompliant(false)] public static unsafe Vector64 Create(uint e0, uint e1) { if (AdvSimd.IsSupported) { - Vector64 result = Vector64.CreateScalarUnsafe(e0); - return AdvSimd.Insert(result, 1, e1); + return Create(e0, e1); } - uint* pResult = stackalloc uint[2] + return SoftwareFallback(e0, e1); + + static Vector64 SoftwareFallback(uint e0, uint e1) { - e0, - e1, - }; + uint* pResult = stackalloc uint[2] + { + e0, + e1, + }; - return Unsafe.AsRef>(pResult); + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. From fca5ca58b06a860483500dba10609864914f4f4f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 11 May 2020 21:30:22 -0700 Subject: [PATCH 02/10] Updating the JIT to emit constants for Vector64 and Vector128.Create --- src/coreclr/src/jit/hwintrinsicarm64.cpp | 41 +++ .../src/jit/hwintrinsiccodegenarm64.cpp | 8 +- src/coreclr/src/jit/hwintrinsiclistarm64.h | 5 +- src/coreclr/src/jit/hwintrinsiclistxarch.h | 4 +- src/coreclr/src/jit/lower.h | 133 ++++++++++ src/coreclr/src/jit/lowerarmarch.cpp | 250 +++++++++++++++++- src/coreclr/src/jit/lowerxarch.cpp | 135 +--------- 7 files changed, 434 insertions(+), 142 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsicarm64.cpp b/src/coreclr/src/jit/hwintrinsicarm64.cpp index 45efb8a68a58b..7127b69e850d1 100644 --- a/src/coreclr/src/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/src/jit/hwintrinsicarm64.cpp @@ -357,6 +357,47 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); break; } + + case NI_Vector64_Create: + case NI_Vector128_Create: + { + // We shouldn't handle this as an intrinsic if the + // respective ISAs have been disabled by the user. + + if (!compExactlyDependsOn(InstructionSet_AdvSimd)) + { + break; + } + + if (sig->numArgs == 1) + { + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize); + } + else if (sig->numArgs == 2) + { + op2 = impPopStack().val; + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, baseType, simdSize); + } + else + { + assert(sig->numArgs >= 3); + + GenTreeArgList* tmp = nullptr; + + for (unsigned i = 0; i < sig->numArgs; i++) + { + tmp = gtNewArgList(impPopStack().val); + tmp->gtOp2 = op1; + op1 = tmp; + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize); + } + break; + } + case NI_Vector64_get_Count: case NI_Vector128_get_Count: { diff --git a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp index 71ac68f759ce8..7aca22290f00d 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp @@ -583,10 +583,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_4S); break; - case NI_Vector64_Create: - case NI_Vector128_Create: case NI_AdvSimd_DuplicateToVector64: case NI_AdvSimd_DuplicateToVector128: + case NI_AdvSimd_Arm64_DuplicateToVector64: case NI_AdvSimd_Arm64_DuplicateToVector128: { if (varTypeIsFloating(intrin.baseType)) @@ -596,6 +595,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) const double dataValue = intrin.op1->AsDblCon()->gtDconVal; GetEmitter()->emitIns_R_F(INS_fmov, emitSize, targetReg, dataValue, opt); } + else if (intrin.id == NI_AdvSimd_Arm64_DuplicateToVector64) + { + assert(intrin.baseType == TYP_DOUBLE); + GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); + } else { GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op1Reg, 0, opt); diff --git a/src/coreclr/src/jit/hwintrinsiclistarm64.h b/src/coreclr/src/jit/hwintrinsiclistarm64.h index f798a17d26e42..0ca6f8d918abc 100644 --- a/src/coreclr/src/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/hwintrinsiclistarm64.h @@ -23,7 +23,7 @@ HARDWARE_INTRINSIC(Vector64, AsSByte, HARDWARE_INTRINSIC(Vector64, AsSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, AsUInt16, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, AsUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, Create, 8, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_mov, INS_mov, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, get_Count, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) @@ -45,7 +45,7 @@ HARDWARE_INTRINSIC(Vector128, AsSingle, 1 HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Create, 16, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) @@ -197,6 +197,7 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareTest, 1 HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareTestScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmtst, INS_cmtst, INS_invalid, INS_cmtst}, HW_Category_SIMDScalar, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd_Arm64, Divide, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fdiv, INS_fdiv}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateSelectedScalarToVector128, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dup, INS_dup, INS_invalid, INS_dup}, HW_Category_IMM, HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateToVector64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_fmov}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dup, INS_dup, INS_invalid, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAdd, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla}, HW_Category_SimpleSIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtract, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls}, HW_Category_SimpleSIMD, HW_Flag_HasRMWSemantics) diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index db2bacbbb6457..c041364c966bf 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -43,7 +43,7 @@ HARDWARE_INTRINSIC(Vector128, AsVector2, HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, AsVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) // The instruction generated for float/double depends on which ISAs are supported HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmppd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) @@ -77,7 +77,7 @@ HARDWARE_INTRINSIC(Vector256, AsVector256, HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmppd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, get_Count, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/src/jit/lower.h b/src/coreclr/src/jit/lower.h index 2fa26918e30f4..ae19cd29c3c3b 100644 --- a/src/coreclr/src/jit/lower.h +++ b/src/coreclr/src/jit/lower.h @@ -317,6 +317,139 @@ class Lowering final : public Phase void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition); void LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node); void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node); + + union VectorConstant { + int8_t i8[32]; + uint8_t u8[32]; + int16_t i16[16]; + uint16_t u16[16]; + int32_t i32[8]; + uint32_t u32[8]; + int64_t i64[4]; + uint64_t u64[4]; + float f32[8]; + double f64[4]; + }; + + //---------------------------------------------------------------------------------------------- + // ProcessArgForHWIntrinsicCreate: Processes an argument for the Lowering::LowerHWIntrinsicCreate method + // + // Arguments: + // arg - The argument to process + // argIdx - The index of the argument being processed + // vecCns - The vector constant being constructed + // baseType - The base type of the vector constant + // + // Returns: + // true if arg was a constant; otherwise, false + static bool HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, VectorConstant& vecCns, var_types baseType) + { + switch (baseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + if (arg->IsCnsIntOrI()) + { + vecCns.i8[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); + return true; + } + else + { + // We expect the VectorConstant to have been already zeroed + assert(vecCns.i8[argIdx] == 0); + } + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + if (arg->IsCnsIntOrI()) + { + vecCns.i16[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); + return true; + } + else + { + // We expect the VectorConstant to have been already zeroed + assert(vecCns.i16[argIdx] == 0); + } + break; + } + + case TYP_INT: + case TYP_UINT: + { + if (arg->IsCnsIntOrI()) + { + vecCns.i32[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); + return true; + } + else + { + // We expect the VectorConstant to have been already zeroed + assert(vecCns.i32[argIdx] == 0); + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + if (arg->OperIs(GT_CNS_LNG)) + { + vecCns.i64[argIdx] = static_cast(arg->AsLngCon()->gtLconVal); + return true; + } + else + { + // We expect the VectorConstant to have been already zeroed + assert(vecCns.i64[argIdx] == 0); + } + break; + } + + case TYP_FLOAT: + { + if (arg->IsCnsFltOrDbl()) + { + vecCns.f32[argIdx] = static_cast(arg->AsDblCon()->gtDconVal); + return true; + } + else + { + // We expect the VectorConstant to have been already zeroed + // We check against the i32, rather than f32, to account for -0.0 + assert(vecCns.i32[argIdx] == 0); + } + break; + } + + case TYP_DOUBLE: + { + if (arg->IsCnsFltOrDbl()) + { + vecCns.f64[argIdx] = static_cast(arg->AsDblCon()->gtDconVal); + return true; + } + else + { + // We expect the VectorConstant to have been already zeroed + // We check against the i64, rather than f64, to account for -0.0 + assert(vecCns.i64[argIdx] == 0); + } + break; + } + + default: + { + unreached(); + } + } + + return false; + } #endif // FEATURE_HW_INTRINSICS // Utility functions diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index 7b7706e9b1caa..bbc0f8dff1ef5 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -535,8 +535,255 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) node->gtType = TYP_SIMD16; } + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + + switch (intrinsicId) + { + case NI_Vector64_Create: + case NI_Vector128_Create: + { + // We don't directly support the Vector64.Create or Vector128.Create methods in codegen + // and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect + // that the node is modified to either not be a HWIntrinsic node or that it is no longer + // the same intrinsic as when it came in. + + LowerHWIntrinsicCreate(node); + assert(!node->OperIsHWIntrinsic() || (node->gtHWIntrinsicId != intrinsicId)); + LowerNode(node); + return; + } + + default: + break; + } + ContainCheckHWIntrinsic(node); } + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCreate: Lowers a Vector64 or Vector128 Create call +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) +{ + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + var_types simdType = node->gtType; + var_types baseType = node->gtSIMDBaseType; + unsigned simdSize = node->gtSIMDSize; + VectorConstant vecCns = {}; + + if ((simdSize == 8) && (simdType == TYP_DOUBLE)) + { + simdType = TYP_SIMD8; + } + + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(baseType)); + assert(simdSize != 0); + + GenTreeArgList* argList = nullptr; + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + // Spare GenTrees to be used for the lowering logic below + // Defined upfront to avoid naming conflicts, etc... + GenTree* idx = nullptr; + GenTree* tmp1 = nullptr; + GenTree* tmp2 = nullptr; + GenTree* tmp3 = nullptr; + + assert(op1 != nullptr); + + unsigned argCnt = 0; + unsigned cnsArgCnt = 0; + + if (op1->OperIsList()) + { + assert(op2 == nullptr); + + for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) + { + if (HandleArgForHWIntrinsicCreate(argList->Current(), argCnt, vecCns, baseType)) + { + cnsArgCnt += 1; + } + argCnt += 1; + } + } + else + { + if (HandleArgForHWIntrinsicCreate(op1, argCnt, vecCns, baseType)) + { + cnsArgCnt += 1; + } + argCnt += 1; + + if (op2 != nullptr) + { + if (HandleArgForHWIntrinsicCreate(op2, argCnt, vecCns, baseType)) + { + cnsArgCnt += 1; + } + argCnt += 1; + } + else if (cnsArgCnt == 1) + { + // These intrinsics are meant to set the same value to every element + // so we'll just specially handle it here and copy it into the remaining + // indices. + + for (unsigned i = 1; i < simdSize / genTypeSize(baseType); i++) + { + HandleArgForHWIntrinsicCreate(op1, i, vecCns, baseType); + } + } + } + assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(baseType)))); + + if (argCnt == cnsArgCnt) + { + if (op1->OperIsList()) + { + for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) + { + BlockRange().Remove(argList->Current()); + } + } + else + { + BlockRange().Remove(op1); + + if (op2 != nullptr) + { + BlockRange().Remove(op2); + } + } + + CORINFO_FIELD_HANDLE hnd = comp->GetEmitter()->emitAnyConst(&vecCns, simdSize, emitDataAlignment::Required); + GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(GT_CLS_VAR_ADDR, TYP_I_IMPL, hnd, nullptr); + BlockRange().InsertBefore(node, clsVarAddr); + + node->ChangeOper(GT_IND); + node->gtOp1 = clsVarAddr; + + // TODO-ARM64-CQ: We should be able to modify at least the paths that use Insert to trivially support partial + // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just + // insert the non-constant values which should still allow some gains. + + return; + } + else if (argCnt == 1) + { + // We have the following (where simd is simd8 or simd16): + // /--* op1 T + // node = * HWINTRINSIC simd T Create + + // We will be constructing the following parts: + // /--* op1 T + // node = * HWINTRINSIC simd T DuplicateToVector + + // This is roughly the following managed code: + // return AdvSimd.Arm64.DuplicateToVector(op1); + + if (varTypeIsLong(baseType) || (baseType == TYP_DOUBLE)) + { + node->gtHWIntrinsicId = + (simdType == TYP_SIMD8) ? NI_AdvSimd_Arm64_DuplicateToVector64 : NI_AdvSimd_Arm64_DuplicateToVector128; + } + else + { + node->gtHWIntrinsicId = + (simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64 : NI_AdvSimd_DuplicateToVector128; + } + return; + } + + // We have the following (where simd is simd8 or simd16): + // /--* op1 T + // +--* ... T + // +--* opN T + // node = * HWINTRINSIC simd T Create + + if (op1->OperIsList()) + { + argList = op1->AsArgList(); + op1 = argList->Current(); + argList = argList->Rest(); + } + + // We will be constructing the following parts: + // /--* op1 T + // tmp1 = * HWINTRINSIC simd8 T CreateScalarUnsafe + // ... + + // This is roughly the following managed code: + // var tmp1 = Vector64.CreateScalarUnsafe(op1); + // ... + + NamedIntrinsic createScalarUnsafe = + (simdType == TYP_SIMD8) ? NI_Vector64_CreateScalarUnsafe : NI_Vector128_CreateScalarUnsafe; + + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, createScalarUnsafe, baseType, simdSize); + BlockRange().InsertAfter(op1, tmp1); + LowerNode(tmp1); + + unsigned N = 0; + GenTree* opN = nullptr; + + for (N = 1; N < argCnt - 1; N++) + { + // We will be constructing the following parts: + // ... + // idx = CNS_INT int N + // /--* tmp1 simd + // +--* idx int + // +--* opN T + // tmp1 = * HWINTRINSIC simd T Insert + // ... + + // This is roughly the following managed code: + // ... + // tmp1 = AdvSimd.Insert(tmp1, N, opN); + // ... + + opN = argList->Current(); + + idx = comp->gtNewIconNode(N, TYP_INT); + BlockRange().InsertBefore(opN, idx); + + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, idx, opN, NI_AdvSimd_Insert, baseType, simdSize); + BlockRange().InsertAfter(opN, tmp1); + LowerNode(tmp1); + + argList = argList->Rest(); + } + + assert(N == (argCnt - 1)); + + // We will be constructing the following parts: + // idx = CNS_INT int N + // /--* tmp1 simd + // +--* idx int + // +--* opN T + // node = * HWINTRINSIC simd T Insert + + // This is roughly the following managed code: + // ... + // tmp1 = AdvSimd.Insert(tmp1, N, opN); + // ... + + opN = (argCnt == 2) ? op2 : argList->Current(); + + idx = comp->gtNewIconNode(N, TYP_INT); + BlockRange().InsertBefore(opN, idx); + + node->gtOp1 = comp->gtNewArgList(tmp1, idx, opN); + node->gtOp2 = nullptr; + + node->gtHWIntrinsicId = NI_AdvSimd_Insert; +} #endif // FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ @@ -933,12 +1180,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; - case NI_Vector64_Create: - case NI_Vector128_Create: case NI_Vector64_CreateScalarUnsafe: case NI_Vector128_CreateScalarUnsafe: case NI_AdvSimd_DuplicateToVector64: case NI_AdvSimd_DuplicateToVector128: + case NI_AdvSimd_Arm64_DuplicateToVector64: case NI_AdvSimd_Arm64_DuplicateToVector128: if (intrin.op1->IsCnsIntOrI()) { diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 14672d16aaaac..f2a66d1166f76 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -1100,141 +1100,8 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) ContainCheckHWIntrinsic(node); } -union VectorConstant { - int8_t i8[32]; - uint8_t u8[32]; - int16_t i16[16]; - uint16_t u16[16]; - int32_t i32[8]; - uint32_t u32[8]; - int64_t i64[4]; - uint64_t u64[4]; - float f32[8]; - double f64[4]; -}; - -//---------------------------------------------------------------------------------------------- -// ProcessArgForHWIntrinsicCreate: Processes an argument for the Lowering::LowerHWIntrinsicCreate method -// -// Arguments: -// arg - The argument to process -// argIdx - The index of the argument being processed -// vecCns - The vector constant being constructed -// baseType - The base type of the vector constant -// -// Returns: -// true if arg was a constant; otherwise, false -static bool HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, VectorConstant& vecCns, var_types baseType) -{ - switch (baseType) - { - case TYP_BYTE: - case TYP_UBYTE: - { - if (arg->IsCnsIntOrI()) - { - vecCns.i8[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - assert(vecCns.i8[argIdx] == 0); - } - break; - } - - case TYP_SHORT: - case TYP_USHORT: - { - if (arg->IsCnsIntOrI()) - { - vecCns.i16[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - assert(vecCns.i16[argIdx] == 0); - } - break; - } - - case TYP_INT: - case TYP_UINT: - { - if (arg->IsCnsIntOrI()) - { - vecCns.i32[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - assert(vecCns.i32[argIdx] == 0); - } - break; - } - - case TYP_LONG: - case TYP_ULONG: - { - if (arg->OperIs(GT_CNS_LNG)) - { - vecCns.i64[argIdx] = static_cast(arg->AsLngCon()->gtLconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - assert(vecCns.i64[argIdx] == 0); - } - break; - } - - case TYP_FLOAT: - { - if (arg->IsCnsFltOrDbl()) - { - vecCns.f32[argIdx] = static_cast(arg->AsDblCon()->gtDconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - // We check against the i32, rather than f32, to account for -0.0 - assert(vecCns.i32[argIdx] == 0); - } - break; - } - - case TYP_DOUBLE: - { - if (arg->IsCnsFltOrDbl()) - { - vecCns.f64[argIdx] = static_cast(arg->AsDblCon()->gtDconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - // We check against the i64, rather than f64, to account for -0.0 - assert(vecCns.i64[argIdx] == 0); - } - break; - } - - default: - { - unreached(); - } - } - - return false; -} - //---------------------------------------------------------------------------------------------- -// Lowering::LowerHWIntrinsicCreate: Lowers a Vector64, Vector128, or Vector256 Create call +// Lowering::LowerHWIntrinsicCreate: Lowers a Vector128 or Vector256 Create call // // Arguments: // node - The hardware intrinsic node. From 0a05995108f0247756b911b3bedd3c345fba34ea Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 12 May 2020 07:57:30 -0700 Subject: [PATCH 03/10] Fixing lookupNamedIntrinsic and impIntrinsic to throw PNSE for unsupported mustExpand intrinsics --- src/coreclr/src/jit/importer.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 52501b346e928..73e6fa2c9d60a 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -4272,7 +4272,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, if (mustExpand && (retNode == nullptr)) { - NO_WAY("JIT must expand the intrinsic!"); + assert(!"Unhandled must expand intrinsic, throwing PlatformNotSupportedException"); + return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand); } // Optionally report if this intrinsic is special @@ -4509,12 +4510,13 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) result = HWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName); } - else if (strcmp(methodName, "get_IsSupported") == 0) - { - return NI_IsSupported_False; - } - else + + if (result == NI_Illegal) { + if (strcmp(methodName, "get_IsSupported") == 0) + { + return NI_IsSupported_False; + } return gtIsRecursiveCall(method) ? NI_Throw_PlatformNotSupportedException : NI_Illegal; } } From c0b72a56f366d5dcff705922db8d94db38f28455 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 12 May 2020 09:40:22 -0700 Subject: [PATCH 04/10] Fixing impIntrinsic to directly use gtNewMustThrowException --- src/coreclr/src/jit/importer.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 73e6fa2c9d60a..c34665d0a2303 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -4273,7 +4273,13 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, if (mustExpand && (retNode == nullptr)) { assert(!"Unhandled must expand intrinsic, throwing PlatformNotSupportedException"); - return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand); + + for (unsigned i = 0; i < sig->numArgs; i++) + { + impPopStack(); + } + + return gtNewMustThrowException(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, JITtype2varType(sig->retType), sig->retTypeClass); } // Optionally report if this intrinsic is special From 4d282afd6f475fb97ec82f525eb8e2f4c1a5f9c6 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 12 May 2020 10:17:53 -0700 Subject: [PATCH 05/10] Move gtNewMustThrowException to not depend on FEATURE_HW_INTRINSICS --- src/coreclr/src/jit/compiler.h | 3 +- src/coreclr/src/jit/gentree.cpp | 68 ++++++++++++++++----------------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 917dbbb9751a7..29625fb1b5c58 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -2657,7 +2657,6 @@ class Compiler NamedIntrinsic hwIntrinsicID); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode( var_types type, GenTree* op1, GenTree* op2, GenTree* op3, NamedIntrinsic hwIntrinsicID); - GenTree* gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd); CORINFO_CLASS_HANDLE gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType); var_types getBaseTypeFromArgIfNeeded(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, @@ -2665,6 +2664,8 @@ class Compiler var_types baseType); #endif // FEATURE_HW_INTRINSICS + GenTree* gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd); + GenTreeLclFld* gtNewLclFldNode(unsigned lnum, var_types type, unsigned offset); GenTree* gtNewInlineCandidateReturnExpr(GenTree* inlineCandidate, var_types type); diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 8e341ce7dab5a..afeb708a2725b 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -18659,40 +18659,6 @@ GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode( GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, TYP_UNKNOWN, 0); } -//--------------------------------------------------------------------------------------- -// gtNewMustThrowException: -// create a throw node (calling into JIT helper) that must be thrown. -// The result would be a comma node: COMMA(jithelperthrow(void), x) where x's type should be specified. -// -// Arguments -// helper - JIT helper ID -// type - return type of the node -// -// Return Value -// pointer to the throw node -// -GenTree* Compiler::gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd) -{ - GenTreeCall* node = gtNewHelperCallNode(helper, TYP_VOID); - node->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; - if (type != TYP_VOID) - { - unsigned dummyTemp = lvaGrabTemp(true DEBUGARG("dummy temp of must thrown exception")); - if (type == TYP_STRUCT) - { - lvaSetStruct(dummyTemp, clsHnd, false); - type = lvaTable[dummyTemp].lvType; // struct type is normalized - } - else - { - lvaTable[dummyTemp].lvType = type; - } - GenTree* dummyNode = gtNewLclvNode(dummyTemp, type); - return gtNewOperNode(GT_COMMA, type, node, dummyNode); - } - return node; -} - // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise bool GenTreeHWIntrinsic::OperIsMemoryLoad() const { @@ -18782,6 +18748,40 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore() const #endif // FEATURE_HW_INTRINSICS +//--------------------------------------------------------------------------------------- +// gtNewMustThrowException: +// create a throw node (calling into JIT helper) that must be thrown. +// The result would be a comma node: COMMA(jithelperthrow(void), x) where x's type should be specified. +// +// Arguments +// helper - JIT helper ID +// type - return type of the node +// +// Return Value +// pointer to the throw node +// +GenTree* Compiler::gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd) +{ + GenTreeCall* node = gtNewHelperCallNode(helper, TYP_VOID); + node->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; + if (type != TYP_VOID) + { + unsigned dummyTemp = lvaGrabTemp(true DEBUGARG("dummy temp of must thrown exception")); + if (type == TYP_STRUCT) + { + lvaSetStruct(dummyTemp, clsHnd, false); + type = lvaTable[dummyTemp].lvType; // struct type is normalized + } + else + { + lvaTable[dummyTemp].lvType = type; + } + GenTree* dummyNode = gtNewLclvNode(dummyTemp, type); + return gtNewOperNode(GT_COMMA, type, node, dummyNode); + } + return node; +} + //--------------------------------------------------------------------------------------- // InitializeStructReturnType: // Initialize the Return Type Descriptor for a method that returns a struct type From 07a08aaf8a8b00689e9c6574ff80fa41f8db7142 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 12 May 2020 10:55:44 -0700 Subject: [PATCH 06/10] Applying formatting patch --- src/coreclr/src/jit/importer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index c34665d0a2303..077785f2e11f1 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -4279,7 +4279,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, impPopStack(); } - return gtNewMustThrowException(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, JITtype2varType(sig->retType), sig->retTypeClass); + return gtNewMustThrowException(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, JITtype2varType(sig->retType), + sig->retTypeClass); } // Optionally report if this intrinsic is special From 5ec1fcde6652b3ac9a2c652734c5b5fded1ed0bf Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 12 May 2020 18:06:52 -0700 Subject: [PATCH 07/10] Add basic support for GT_CLS_VAR_ADDR to the ARM64 JIT --- src/coreclr/src/jit/emitarm64.cpp | 10 +++++++-- src/coreclr/src/jit/lowerarmarch.cpp | 19 +++++++++++++--- src/coreclr/src/jit/lsraarmarch.cpp | 33 ++++++++++++++++++---------- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/src/coreclr/src/jit/emitarm64.cpp b/src/coreclr/src/jit/emitarm64.cpp index 81bca4118a9ff..41b9d41163772 100644 --- a/src/coreclr/src/jit/emitarm64.cpp +++ b/src/coreclr/src/jit/emitarm64.cpp @@ -12718,7 +12718,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (addr->isContained()) { - assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA); + assert(addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA); int offset = 0; DWORD lsl = 0; @@ -12795,7 +12795,13 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR } else // no Index register { - if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet()))) + if (addr->OperGet() == GT_CLS_VAR_ADDR) + { + // Get a temp integer register to compute long address. + regNumber addrReg = indir->GetSingleTempReg(); + emitIns_R_C(ins, attr, dataReg, addrReg, addr->AsClsVar()->gtClsVarHnd, 0); + } + else if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet()))) { // Then load/store dataReg from/to [memBase + offset] emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset); diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index bbc0f8dff1ef5..d5272abb25c99 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -857,10 +857,12 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) } #endif // FEATURE_SIMD - GenTree* addr = indirNode->Addr(); - bool makeContained = true; + GenTree* addr = indirNode->Addr(); + if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr)) { + bool makeContained = true; + #ifdef TARGET_ARM // ARM floating-point load/store doesn't support a form similar to integer // ldr Rdst, [Rbase + Roffset] with offset in a register. The only supported @@ -884,12 +886,23 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) } } } -#endif +#endif // TARGET_ARM + if (makeContained) { MakeSrcContained(indirNode, addr); } } +#ifdef TARGET_ARM64 + else if (addr->OperGet() == GT_CLS_VAR_ADDR) + { + // These nodes go into an addr mode: + // - GT_CLS_VAR_ADDR turns into a constant. + + // make this contained, it turns into a constant that goes into an addr mode + MakeSrcContained(indirNode, addr); + } +#endif // TARGET_ARM64 } //------------------------------------------------------------------------ diff --git a/src/coreclr/src/jit/lsraarmarch.cpp b/src/coreclr/src/jit/lsraarmarch.cpp index fab94aaf11252..af7557807035a 100644 --- a/src/coreclr/src/jit/lsraarmarch.cpp +++ b/src/coreclr/src/jit/lsraarmarch.cpp @@ -74,23 +74,32 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) if (addr->isContained()) { - assert(addr->OperGet() == GT_LEA); - GenTreeAddrMode* lea = addr->AsAddrMode(); - index = lea->Index(); - cns = lea->Offset(); - - // On ARM we may need a single internal register - // (when both conditions are true then we still only need a single internal register) - if ((index != nullptr) && (cns != 0)) + if (addr->OperGet() == GT_LEA) { - // ARM does not support both Index and offset so we need an internal register - buildInternalIntRegisterDefForNode(indirTree); + GenTreeAddrMode* lea = addr->AsAddrMode(); + index = lea->Index(); + cns = lea->Offset(); + + // On ARM we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // ARM does not support both Index and offset so we need an internal register + buildInternalIntRegisterDefForNode(indirTree); + } + else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) + { + // This offset can't be contained in the ldr/str instruction, so we need an internal register + buildInternalIntRegisterDefForNode(indirTree); + } } - else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) +#ifdef TARGET_ARM64 + else if (addr->OperGet() == GT_CLS_VAR_ADDR) { - // This offset can't be contained in the ldr/str instruction, so we need an internal register + // Reserve int to load constant from memory (IF_LARGELDC) buildInternalIntRegisterDefForNode(indirTree); } +#endif // TARGET_ARM64 } #ifdef FEATURE_SIMD From b2401b5fa7600aa38d90fca8b441457c3d509ac4 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 13 May 2020 08:06:42 -0700 Subject: [PATCH 08/10] Update lookupNamedIntrinsic to handle System.Runtime.Intrinsics for unsupported platforms --- src/coreclr/src/jit/compiler.h | 8 +- src/coreclr/src/jit/emitarm64.cpp | 1 + src/coreclr/src/jit/hwintrinsic.cpp | 44 ----------- src/coreclr/src/jit/importer.cpp | 93 ++++++++++++++++++++---- src/coreclr/src/jit/namedintrinsiclist.h | 5 +- 5 files changed, 88 insertions(+), 63 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 29625fb1b5c58..a8e6220db3d61 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -3714,6 +3714,10 @@ class Compiler CorInfoIntrinsics intrinsicID, bool tailCall); NamedIntrinsic lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method); + GenTree* impUnsupportedNamedIntrinsic(unsigned helper, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand); #ifdef FEATURE_HW_INTRINSICS GenTree* impHWIntrinsic(NamedIntrinsic intrinsic, @@ -3721,10 +3725,6 @@ class Compiler CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, bool mustExpand); - GenTree* impUnsupportedHWIntrinsic(unsigned helper, - CORINFO_METHOD_HANDLE method, - CORINFO_SIG_INFO* sig, - bool mustExpand); GenTree* impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, diff --git a/src/coreclr/src/jit/emitarm64.cpp b/src/coreclr/src/jit/emitarm64.cpp index 41b9d41163772..e63cb61162113 100644 --- a/src/coreclr/src/jit/emitarm64.cpp +++ b/src/coreclr/src/jit/emitarm64.cpp @@ -7561,6 +7561,7 @@ void emitter::emitIns_R_C( assert(isValidGeneralDatasize(size)); } break; + default: unreached(); } diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 8f33c4851771b..e0e835043dd39 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -89,50 +89,6 @@ var_types Compiler::getBaseTypeFromArgIfNeeded(NamedIntrinsic intrinsic, return baseType; } -//------------------------------------------------------------------------ -// impUnsupportedHWIntrinsic: returns a node for an unsupported HWIntrinsic -// -// Arguments: -// helper - JIT helper ID for the exception to be thrown -// method - method handle of the intrinsic function. -// sig - signature of the intrinsic call -// mustExpand - true if the intrinsic must return a GenTree*; otherwise, false -// -// Return Value: -// a gtNewMustThrowException if mustExpand is true; otherwise, nullptr -// -GenTree* Compiler::impUnsupportedHWIntrinsic(unsigned helper, - CORINFO_METHOD_HANDLE method, - CORINFO_SIG_INFO* sig, - bool mustExpand) -{ - // We've hit some error case and may need to return a node for the given error. - // - // When `mustExpand=false`, we are attempting to inline the intrinsic directly into another method. In this - // scenario, we need to return `nullptr` so that a GT_CALL to the intrinsic is emitted instead. This is to - // ensure that everything continues to behave correctly when optimizations are enabled (e.g. things like the - // inliner may expect the node we return to have a certain signature, and the `MustThrowException` node won't - // match that). - // - // When `mustExpand=true`, we are in a GT_CALL to the intrinsic and are attempting to JIT it. This will generally - // be in response to an indirect call (e.g. done via reflection) or in response to an earlier attempt returning - // `nullptr` (under `mustExpand=false`). In that scenario, we are safe to return the `MustThrowException` node. - - if (mustExpand) - { - for (unsigned i = 0; i < sig->numArgs; i++) - { - impPopStack(); - } - - return gtNewMustThrowException(helper, JITtype2varType(sig->retType), sig->retTypeClass); - } - else - { - return nullptr; - } -} - CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType) { if (simdType == TYP_SIMD16) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 077785f2e11f1..3c34cef60d76c 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -3478,29 +3478,34 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, { ni = lookupNamedIntrinsic(method); -#ifdef FEATURE_HW_INTRINSICS + // We specially support the following on all platforms to allow for dead + // code optimization and to more generally support recursive intrinsics. + if (ni == NI_IsSupported_True) { + assert(sig->numArgs == 0); return gtNewIconNode(true); } if (ni == NI_IsSupported_False) { + assert(sig->numArgs == 0); return gtNewIconNode(false); } if (ni == NI_Throw_PlatformNotSupportedException) { - return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand); + return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand); } +#ifdef FEATURE_HW_INTRINSICS if ((ni > NI_HW_INTRINSIC_START) && (ni < NI_HW_INTRINSIC_END)) { GenTree* hwintrinsic = impHWIntrinsic(ni, clsHnd, method, sig, mustExpand); if (mustExpand && (hwintrinsic == nullptr)) { - return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand); + return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand); } return hwintrinsic; @@ -4273,14 +4278,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, if (mustExpand && (retNode == nullptr)) { assert(!"Unhandled must expand intrinsic, throwing PlatformNotSupportedException"); - - for (unsigned i = 0; i < sig->numArgs; i++) - { - impPopStack(); - } - - return gtNewMustThrowException(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, JITtype2varType(sig->retType), - sig->retTypeClass); + return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand); } // Optionally report if this intrinsic is special @@ -4497,8 +4495,13 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) result = SimdAsHWIntrinsicInfo::lookupId(&sig, className, methodName, enclosingClassName, sizeOfVectorT); } +#endif // FEATURE_HW_INTRINSICS else if (strncmp(namespaceName, "System.Runtime.Intrinsics", 25) == 0) { + // We go down this path even when FEATURE_HW_INTRINSICS isn't enabled + // so we can specially handle IsSupported and recursive calls. + +#ifdef FEATURE_HW_INTRINSICS namespaceName += 25; const char* platformNamespaceName; @@ -4517,22 +4520,40 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) result = HWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName); } +#endif // FEATURE_HW_INTRINSICS if (result == NI_Illegal) { if (strcmp(methodName, "get_IsSupported") == 0) { - return NI_IsSupported_False; + // This allows the relevant code paths to be dropped as dead code even + // on platforms where FEATURE_HW_INTRINSICS is not supported. + + result = NI_IsSupported_False; + } + else if (gtIsRecursiveCall(method)) + { + // For the framework itself, any recursive intrinsics will either be + // only supported on a single platform or will be guarded by a relevant + // IsSupported check so the throw PNSE will be valid or dropped. + + result = NI_Throw_PlatformNotSupportedException; } - return gtIsRecursiveCall(method) ? NI_Throw_PlatformNotSupportedException : NI_Illegal; } } -#endif // FEATURE_HW_INTRINSICS if (result == NI_Illegal) { JITDUMP("Not recognized\n"); } + else if (result == NI_IsSupported_False) + { + JITDUMP("Unsupported - return false"); + } + else if (result == NI_Throw_PlatformNotSupportedException) + { + JITDUMP("Unsupported - throw PlatformNotSupportedException"); + } else { JITDUMP("Recognized\n"); @@ -4540,6 +4561,50 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) return result; } +//------------------------------------------------------------------------ +// impUnsupportedNamedIntrinsic: Throws an exception for an unsupported named intrinsic +// +// Arguments: +// helper - JIT helper ID for the exception to be thrown +// method - method handle of the intrinsic function. +// sig - signature of the intrinsic call +// mustExpand - true if the intrinsic must return a GenTree*; otherwise, false +// +// Return Value: +// a gtNewMustThrowException if mustExpand is true; otherwise, nullptr +// +GenTree* Compiler::impUnsupportedNamedIntrinsic(unsigned helper, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand) +{ + // We've hit some error case and may need to return a node for the given error. + // + // When `mustExpand=false`, we are attempting to inline the intrinsic directly into another method. In this + // scenario, we need to return `nullptr` so that a GT_CALL to the intrinsic is emitted instead. This is to + // ensure that everything continues to behave correctly when optimizations are enabled (e.g. things like the + // inliner may expect the node we return to have a certain signature, and the `MustThrowException` node won't + // match that). + // + // When `mustExpand=true`, we are in a GT_CALL to the intrinsic and are attempting to JIT it. This will generally + // be in response to an indirect call (e.g. done via reflection) or in response to an earlier attempt returning + // `nullptr` (under `mustExpand=false`). In that scenario, we are safe to return the `MustThrowException` node. + + if (mustExpand) + { + for (unsigned i = 0; i < sig->numArgs; i++) + { + impPopStack(); + } + + return gtNewMustThrowException(helper, JITtype2varType(sig->retType), sig->retTypeClass); + } + else + { + return nullptr; + } +} + /*****************************************************************************/ GenTree* Compiler::impArrayAccessIntrinsic( diff --git a/src/coreclr/src/jit/namedintrinsiclist.h b/src/coreclr/src/jit/namedintrinsiclist.h index d105eabdbb3dd..457d434898c9b 100644 --- a/src/coreclr/src/jit/namedintrinsiclist.h +++ b/src/coreclr/src/jit/namedintrinsiclist.h @@ -22,11 +22,14 @@ enum NamedIntrinsic : unsigned short NI_System_Type_get_IsValueType, NI_System_Type_IsAssignableFrom, -#ifdef FEATURE_HW_INTRINSICS + // These are used by HWIntrinsics but are defined more generally + // to allow dead code optimization and handle the recursion case + NI_IsSupported_True, NI_IsSupported_False, NI_Throw_PlatformNotSupportedException, +#ifdef FEATURE_HW_INTRINSICS NI_HW_INTRINSIC_START, #if defined(TARGET_XARCH) #define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ From e86e2d6e45e676a605a7734f5020cd197e1eb4d0 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 13 May 2020 10:53:50 -0700 Subject: [PATCH 09/10] Fixing INS_ldr in emitIns_R_C to use isValidVectorLSDatasize --- src/coreclr/src/jit/emitarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/emitarm64.cpp b/src/coreclr/src/jit/emitarm64.cpp index e63cb61162113..5780ffbe70be7 100644 --- a/src/coreclr/src/jit/emitarm64.cpp +++ b/src/coreclr/src/jit/emitarm64.cpp @@ -7548,7 +7548,7 @@ void emitter::emitIns_R_C( fmt = IF_LARGELDC; if (isVectorRegister(reg)) { - assert(isValidScalarDatasize(size)); + assert(isValidVectorLSDatasize(size)); // For vector (float/double) register, we should have an integer address reg to // compute long address which consists of page address and page offset. // For integer constant, this is not needed since the dest reg can be used to From ee58c32a4354c10e83f270bde1ffdd4ddb947972 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 13 May 2020 13:00:19 -0700 Subject: [PATCH 10/10] Elaborate on why we specially recognize the HWIntrinsics even on platforms that don't support them --- src/coreclr/src/jit/importer.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 3c34cef60d76c..014e42a1293d7 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -4501,6 +4501,23 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) // We go down this path even when FEATURE_HW_INTRINSICS isn't enabled // so we can specially handle IsSupported and recursive calls. + // This is required to appropriately handle the intrinsics on platforms + // which don't support them. On such a platform methods like Vector64.Create + // will be seen as `Intrinsic` and `mustExpand` due to having a code path + // which is recursive. When such a path is hit we expect it to be handled by + // the importer and we fire an assert if it wasn't and in previous versions + // of the JIT would fail fast. This was changed to throw a PNSE instead but + // we still assert as most intrinsics should have been recognized/handled. + + // In order to avoid the assert, we specially handle the IsSupported checks + // (to better allow dead-code optimizations) and we explicitly throw a PNSE + // as we know that is the desired behavior for the HWIntrinsics when not + // supported. For cases like Vector64.Create, this is fine because it will + // be behind a relevant IsSupported check and will never be hit and the + // software fallback will be executed instead. + + CLANG_FORMAT_COMMENT_ANCHOR; + #ifdef FEATURE_HW_INTRINSICS namespaceName += 25; const char* platformNamespaceName;