Expose the Sin, Cos, and SinCos methods on the Vector types (#104848)

* Allow using a more efficient algorithm if twice the vector size is accelerated * Remove an unnecessary generic parameter from ExpDouble * Expose the Sin, Cos, and SinCos methods on the Vector types * Use the vector Sin, Cos, and SinCos methods where possible * Adding tests covering the vector Sin, Cos, and SinCos APIs * Fix some small bugs in the Sin, Cos, and SinCos impls * Ensure that very large inputs are handled * Ensure region is correctly adjusted when determining the sign of sin * Ensure that TernaryLogic lowering accounts for AND_NOT since it is not commutative * Don't vectorize too large SinPi or CosPi inputs for TensorPrimitives * Don't accelerate SinCosPi for the time being * Don't accelerate TensorPrimitives.SinCos for the time being * Don't include JIT changes, they were extracted to their own PR
dotnet · Jul 19, 2024 · 72f9ee0 · 72f9ee0
1 parent 5fd965d
commit 72f9ee0
Show file tree

Hide file tree

Showing 29 changed files with 2,932 additions and 490 deletions.
diff --git a/src/libraries/Common/tests/System/GenericMathTestMemberData.cs b/src/libraries/Common/tests/System/GenericMathTestMemberData.cs
diff --git a/...aries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs b/...aries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
@@ -60,12 +60,24 @@ public static void Cos<T>(ReadOnlySpan<T> x, Span<T> destination)
             // 3. Reconstruction
             //      Hence, cos(x) = sin(x + pi/2) = (-1)^N * sin(f)
 
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static T Invoke(T x) => T.Cos(x);
 
             public static Vector128<T> Invoke(Vector128<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.Cos(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector128.Cos(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -75,10 +87,22 @@ public static Vector128<T> Invoke(Vector128<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
 
             public static Vector256<T> Invoke(Vector256<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.Cos(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector256.Cos(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -88,10 +112,22 @@ public static Vector256<T> Invoke(Vector256<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
 
             public static Vector512<T> Invoke(Vector512<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.Cos(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector512.Cos(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -101,9 +137,25 @@ public static Vector512<T> Invoke(Vector512<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
         }
 
+#if NET9_0_OR_GREATER
+        // These are still used by CosPiOperator
+
+        private readonly struct CosOperatorSingle
+        {
+            internal const uint MaxVectorizedValue = 0x4A989680u;
+            internal const uint SignMask = 0x7FFFFFFFu;
+        }
+
+        private readonly struct CosOperatorDouble
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+        }
+#else
         /// <summary>float.Cos(x)</summary>
         private readonly struct CosOperatorSingle : IUnaryOperator<float, float>
         {
@@ -347,5 +399,6 @@ public static Vector512<double> Invoke(Vector512<double> x)
                 return (poly.AsUInt64() ^ odd).AsDouble();
             }
         }
+#endif
     }
 }
diff --git a/...ies/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs b/...ies/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs
@@ -33,13 +33,15 @@ public static void CosPi<T>(ReadOnlySpan<T> x, Span<T> destination)
         private readonly struct CosPiOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static T Invoke(T x) => T.CosPi(x);
 
             public static Vector128<T> Invoke(Vector128<T> x)
             {
                 Vector128<T> xpi = x * Vector128.Create(T.Pi);
+
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(CosOperatorSingle.SignMask), Vector128.Create(CosOperatorSingle.MaxVectorizedValue)))
@@ -62,6 +64,7 @@ public static Vector128<T> Invoke(Vector128<T> x)
             public static Vector256<T> Invoke(Vector256<T> x)
             {
                 Vector256<T> xpi = x * Vector256.Create(T.Pi);
+
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(CosOperatorSingle.SignMask), Vector256.Create(CosOperatorSingle.MaxVectorizedValue)))
@@ -84,6 +87,7 @@ public static Vector256<T> Invoke(Vector256<T> x)
             public static Vector512<T> Invoke(Vector512<T> x)
             {
                 Vector512<T> xpi = x * Vector512.Create(T.Pi);
+
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(CosOperatorSingle.SignMask), Vector512.Create(CosOperatorSingle.MaxVectorizedValue)))

diff --git a/...tem.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FloatHelpers.cs b/...tem.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FloatHelpers.cs
@@ -24,5 +24,79 @@ private static Vector256<double> ApplyScalar<TOperator>(Vector256<double> double
 
         private static Vector512<double> ApplyScalar<TOperator>(Vector512<double> doubles) where TOperator : IUnaryOperator<double, double> =>
             Vector512.Create(ApplyScalar<TOperator>(doubles.GetLower()), ApplyScalar<TOperator>(doubles.GetUpper()));
+
+        private static (Vector128<float> First, Vector128<float> Second) Apply2xScalar<TOperator>(Vector128<float> floats)
+            where TOperator : IUnaryInputBinaryOutput<float>
+        {
+            (float firstRes0, float secondRes0) = TOperator.Invoke(floats[0]);
+            (float firstRes1, float secondRes1) = TOperator.Invoke(floats[1]);
+            (float firstRes2, float secondRes2) = TOperator.Invoke(floats[2]);
+            (float firstRes3, float secondRes3) = TOperator.Invoke(floats[3]);
+
+            return (
+                Vector128.Create(firstRes0, firstRes1, firstRes2, firstRes3),
+                Vector128.Create(secondRes0, secondRes1, secondRes2, secondRes3)
+            );
+        }
+
+        private static (Vector256<float> First, Vector256<float> Second) Apply2xScalar<TOperator>(Vector256<float> floats)
+            where TOperator : IUnaryInputBinaryOutput<float>
+        {
+            (Vector128<float> firstLower, Vector128<float> secondLower) = Apply2xScalar<TOperator>(floats.GetLower());
+            (Vector128<float> firstUpper, Vector128<float> secondUpper) = Apply2xScalar<TOperator>(floats.GetUpper());
+
+            return (
+                Vector256.Create(firstLower, firstUpper),
+                Vector256.Create(secondLower, secondUpper)
+            );
+        }
+
+        private static (Vector512<float> First, Vector512<float> Second) Apply2xScalar<TOperator>(Vector512<float> floats)
+            where TOperator : IUnaryInputBinaryOutput<float>
+        {
+            (Vector256<float> firstLower, Vector256<float> secondLower) = Apply2xScalar<TOperator>(floats.GetLower());
+            (Vector256<float> firstUpper, Vector256<float> secondUpper) = Apply2xScalar<TOperator>(floats.GetUpper());
+
+            return (
+                Vector512.Create(firstLower, firstUpper),
+                Vector512.Create(secondLower, secondUpper)
+            );
+        }
+
+        private static (Vector128<double> First, Vector128<double> Second) Apply2xScalar<TOperator>(Vector128<double> doubles)
+            where TOperator : IUnaryInputBinaryOutput<double>
+        {
+            (double firstRes0, double secondRes0) = TOperator.Invoke(doubles[0]);
+            (double firstRes1, double secondRes1) = TOperator.Invoke(doubles[1]);
+
+            return (
+                Vector128.Create(firstRes0, firstRes1),
+                Vector128.Create(secondRes0, secondRes1)
+            );
+        }
+
+        private static (Vector256<double> First, Vector256<double> Second) Apply2xScalar<TOperator>(Vector256<double> doubles)
+            where TOperator : IUnaryInputBinaryOutput<double>
+        {
+            (Vector128<double> firstLower, Vector128<double> secondLower) = Apply2xScalar<TOperator>(doubles.GetLower());
+            (Vector128<double> firstUpper, Vector128<double> secondUpper) = Apply2xScalar<TOperator>(doubles.GetUpper());
+
+            return (
+                Vector256.Create(firstLower, firstUpper),
+                Vector256.Create(secondLower, secondUpper)
+            );
+        }
+
+        private static (Vector512<double> First, Vector512<double> Second) Apply2xScalar<TOperator>(Vector512<double> doubles)
+            where TOperator : IUnaryInputBinaryOutput<double>
+        {
+            (Vector256<double> firstLower, Vector256<double> secondLower) = Apply2xScalar<TOperator>(doubles.GetLower());
+            (Vector256<double> firstUpper, Vector256<double> secondUpper) = Apply2xScalar<TOperator>(doubles.GetUpper());
+
+            return (
+                Vector512.Create(firstLower, firstUpper),
+                Vector512.Create(secondLower, secondUpper)
+            );
+        }
     }
 }
diff --git a/...aries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs b/...aries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
@@ -50,12 +50,24 @@ public static void Sin<T>(ReadOnlySpan<T> x, Span<T> destination)
             //
             // The term sin(f) can be approximated by using a polynomial
 
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static T Invoke(T x) => T.Sin(x);
 
             public static Vector128<T> Invoke(Vector128<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.Sin(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector128.Sin(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -65,10 +77,22 @@ public static Vector128<T> Invoke(Vector128<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
 
             public static Vector256<T> Invoke(Vector256<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.Sin(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector256.Sin(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -78,10 +102,22 @@ public static Vector256<T> Invoke(Vector256<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
 
             public static Vector512<T> Invoke(Vector512<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.Sin(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector512.Sin(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -91,9 +127,25 @@ public static Vector512<T> Invoke(Vector512<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
         }
 
+#if NET9_0_OR_GREATER
+        // These are still used by SinPiOperator
+
+        private readonly struct SinOperatorSingle
+        {
+            internal const uint MaxVectorizedValue = 0x49800000u;
+            internal const uint SignMask = 0x7FFFFFFFu;
+        }
+
+        private readonly struct SinOperatorDouble
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+        }
+#else
         /// <summary>float.Sin(x)</summary>
         private readonly struct SinOperatorSingle : IUnaryOperator<float, float>
         {
@@ -334,5 +386,6 @@ public static Vector512<double> Invoke(Vector512<double> x)
                 return (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask)) ^ odd).AsDouble();
             }
         }
+#endif
     }
 }
diff --git a/...ies/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs b/...ies/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs
@@ -33,13 +33,15 @@ public static void SinPi<T>(ReadOnlySpan<T> x, Span<T> destination)
         private readonly struct SinPiOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static T Invoke(T x) => T.SinPi(x);
 
             public static Vector128<T> Invoke(Vector128<T> x)
             {
                 Vector128<T> xpi = x * Vector128.Create(T.Pi);
+
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue)))
@@ -62,6 +64,7 @@ public static Vector128<T> Invoke(Vector128<T> x)
             public static Vector256<T> Invoke(Vector256<T> x)
             {
                 Vector256<T> xpi = x * Vector256.Create(T.Pi);
+
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue)))
@@ -84,6 +87,7 @@ public static Vector256<T> Invoke(Vector256<T> x)
             public static Vector512<T> Invoke(Vector512<T> x)
             {
                 Vector512<T> xpi = x * Vector512.Create(T.Pi);
+
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue)))