From 773a97d614723045479684dc3dc32c5b6584f0d9 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 18 Jan 2024 23:55:37 -0500 Subject: [PATCH 01/14] Add more TensorPrimitive operations All are functional and tested, some are vectorized, others still need to be. --- .../ref/System.Numerics.Tensors.netcore.cs | 82 +- .../Tensors/TensorPrimitives.Single.cs | 8 +- .../TensorPrimitives.Single.netcore.cs | 12 +- .../Tensors/netcore/TensorPrimitives.T.cs | 2361 +++++++++++++---- .../netcore/TensorPrimitives.netcore.cs | 2060 ++++++++++---- .../TensorPrimitives.Single.netstandard.cs | 4 +- .../src/System/ThrowHelper.cs | 6 +- .../tests/TensorPrimitives.Generic.cs | 1342 +++++++++- .../TensorPrimitives.NonGeneric.Single.cs | 208 +- .../tests/TensorPrimitivesTests.cs | 282 +- 10 files changed, 5149 insertions(+), 1216 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index da348aca97390..93da492b69dde 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -9,22 +9,65 @@ namespace System.Numerics.Tensors public static partial class TensorPrimitives { public static void Abs(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.INumberBase { } + public static void Acosh(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IHyperbolicFunctions { } + public static void AcosPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void Acos(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void AddMultiply(System.ReadOnlySpan x, System.ReadOnlySpan y, System.ReadOnlySpan multiplier, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } public static void AddMultiply(System.ReadOnlySpan x, System.ReadOnlySpan y, T multiplier, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } public static void AddMultiply(System.ReadOnlySpan x, T y, System.ReadOnlySpan multiplier, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } public static void Add(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IAdditiveIdentity { } public static void Add(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IAdditiveIdentity { } - public static void ConvertToHalf(System.ReadOnlySpan source, System.Span destination) { throw null; } - public static void ConvertToSingle(System.ReadOnlySpan source, System.Span destination) { throw null; } + public static void Asinh(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IHyperbolicFunctions { } + public static void AsinPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void Asin(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void Atan2Pi(System.ReadOnlySpan y, System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Atan2(System.ReadOnlySpan y, System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Atanh(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IHyperbolicFunctions { } + public static void AtanPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void Atan(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void BitwiseAnd(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IBitwiseOperators { } + public static void BitwiseAnd(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IBitwiseOperators { } + public static void BitwiseOr(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IBitwiseOperators { } + public static void BitwiseOr(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IBitwiseOperators { } + public static void Cbrt(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IRootFunctions { } + public static void Ceiling(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPoint { } + public static void ConvertToHalf(System.ReadOnlySpan source, System.Span destination) { } + public static void ConvertToSingle(System.ReadOnlySpan source, System.Span destination) { } + public static void CopySign(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.INumber { } + public static void CosPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void Cos(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void Cosh(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IHyperbolicFunctions { } public static T CosineSimilarity(System.ReadOnlySpan x, System.ReadOnlySpan y) where T : System.Numerics.IRootFunctions { throw null; } + public static void DegreesToRadians(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static T Distance(System.ReadOnlySpan x, System.ReadOnlySpan y) where T : System.Numerics.IRootFunctions { throw null; } public static void Divide(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IDivisionOperators { } public static void Divide(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IDivisionOperators { } + public static void Divide(T x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IDivisionOperators { } public static T Dot(System.ReadOnlySpan x, System.ReadOnlySpan y) where T : System.Numerics.IAdditionOperators, System.Numerics.IAdditiveIdentity, System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { throw null; } public static void Exp(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IExponentialFunctions { } + public static void Exp10M1(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IExponentialFunctions { } + public static void Exp10(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IExponentialFunctions { } + public static void Exp2M1(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IExponentialFunctions { } + public static void Exp2(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IExponentialFunctions { } + public static void ExpM1(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IExponentialFunctions { } + public static void Floor(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPoint { } + public static void Hypot(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IRootFunctions { } + public static void Ieee754Remainder(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void ILogB(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static int IndexOfMaxMagnitude(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } + public static int IndexOfMax(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } + public static int IndexOfMinMagnitude(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } + public static int IndexOfMin(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } + public static void LeadingZeroCount(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IBinaryInteger { } + public static void Lerp(System.ReadOnlySpan x, System.ReadOnlySpan y, System.ReadOnlySpan amount, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } public static void Log2(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } + public static void Log2P1(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } + public static void LogP1(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } + public static void Log(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } + public static void Log(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } public static void Log(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } + public static void Log10P1(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } + public static void Log10(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } public static T MaxMagnitude(System.ReadOnlySpan x) where T : System.Numerics.INumberBase { throw null; } public static void MaxMagnitude(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.INumberBase { } public static T Max(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } @@ -36,21 +79,56 @@ public static void Min(System.ReadOnlySpan x, System.ReadOnlySpan y, Sy public static void MultiplyAdd(System.ReadOnlySpan x, System.ReadOnlySpan y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } public static void MultiplyAdd(System.ReadOnlySpan x, System.ReadOnlySpan y, T addend, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } public static void MultiplyAdd(System.ReadOnlySpan x, T y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } + public static void MultiplyAddEstimate(System.ReadOnlySpan x, System.ReadOnlySpan y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void MultiplyAddEstimate(System.ReadOnlySpan x, System.ReadOnlySpan y, T addend, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void MultiplyAddEstimate(System.ReadOnlySpan x, T y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } public static void Multiply(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { } public static void Multiply(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { } public static void Negate(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IUnaryNegationOperators { } public static T Norm(System.ReadOnlySpan x) where T : System.Numerics.IRootFunctions { throw null; } + public static void OnesComplement(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IBitwiseOperators { } + public static void PopCount(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IBinaryInteger { } + public static void Pow(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IPowerFunctions { } + public static void Pow(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IPowerFunctions { } public static T ProductOfDifferences(System.ReadOnlySpan x, System.ReadOnlySpan y) where T : System.Numerics.ISubtractionOperators, System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { throw null; } public static T ProductOfSums(System.ReadOnlySpan x, System.ReadOnlySpan y) where T : System.Numerics.IAdditionOperators, System.Numerics.IAdditiveIdentity, System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { throw null; } public static T Product(System.ReadOnlySpan x) where T : System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { throw null; } + public static void RadiansToDegrees(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void ReciprocalEstimate(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void ReciprocalSqrtEstimate(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void ReciprocalSqrt(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Reciprocal(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPoint { } + public static void RootN(System.ReadOnlySpan x, int n, System.Span destination) where T : System.Numerics.IRootFunctions { } + public static void RotateLeft(System.ReadOnlySpan x, int rotateAmount, System.Span destination) where T : System.Numerics.IBinaryInteger { } + public static void RotateRight(System.ReadOnlySpan x, int rotateAmount, System.Span destination) where T : System.Numerics.IBinaryInteger { } + public static void Round(System.ReadOnlySpan x, int digits, System.MidpointRounding mode, System.Span destination) where T : System.Numerics.IFloatingPoint { } + public static void Round(System.ReadOnlySpan x, int digits, System.Span destination) where T : System.Numerics.IFloatingPoint { } + public static void Round(System.ReadOnlySpan x, System.MidpointRounding mode, System.Span destination) where T : System.Numerics.IFloatingPoint { } + public static void Round(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPoint { } + public static void ScaleB(System.ReadOnlySpan x, int n, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void ShiftLeft(System.ReadOnlySpan x, int shiftAmount, System.Span destination) where T : System.Numerics.IBinaryInteger { } + public static void ShiftRightArithmetic(System.ReadOnlySpan x, int shiftAmount, System.Span destination) where T : System.Numerics.IBinaryInteger { } + public static void ShiftRightLogical(System.ReadOnlySpan x, int shiftAmount, System.Span destination) where T : System.Numerics.IBinaryInteger { } public static void Sigmoid(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IExponentialFunctions { } + public static void SinCosPi(System.ReadOnlySpan x, System.Span sinPiDestination, System.Span cosPiDestination) where T : System.Numerics.ITrigonometricFunctions { } + public static void SinCos(System.ReadOnlySpan x, System.Span sinDestination, System.Span cosDestination) where T : System.Numerics.ITrigonometricFunctions { } public static void Sinh(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IHyperbolicFunctions { } + public static void SinPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void Sin(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void SoftMax(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IExponentialFunctions { } + public static void Sqrt(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IRootFunctions { } public static void Subtract(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.ISubtractionOperators { } public static void Subtract(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.ISubtractionOperators { } + public static void Subtract(T x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.ISubtractionOperators { } public static T SumOfMagnitudes(System.ReadOnlySpan x) where T : System.Numerics.INumberBase { throw null; } public static T SumOfSquares(System.ReadOnlySpan x) where T : System.Numerics.IAdditionOperators, System.Numerics.IAdditiveIdentity, System.Numerics.IMultiplyOperators { throw null; } public static T Sum(System.ReadOnlySpan x) where T : System.Numerics.IAdditionOperators, System.Numerics.IAdditiveIdentity { throw null; } public static void Tanh(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IHyperbolicFunctions { } + public static void TanPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void Tan(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } + public static void TrailingZeroCount(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IBinaryInteger { } + public static void Truncate(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPoint { } + public static void Xor(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IBitwiseOperators { } + public static void Xor(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IBitwiseOperators { } } } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.Single.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.Single.cs index 0ad05d15286d9..7255f7a212bd1 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.Single.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.Single.cs @@ -303,7 +303,7 @@ public static void Exp(ReadOnlySpan x, Span destination) => /// /// public static int IndexOfMax(ReadOnlySpan x) => - IndexOfMinMaxCore(x); + IndexOfMinMaxCore(x); /// Searches for the index of the single-precision floating-point number with the largest magnitude in the specified tensor. /// The tensor, represented as a span. @@ -320,7 +320,7 @@ public static int IndexOfMax(ReadOnlySpan x) => /// /// public static int IndexOfMaxMagnitude(ReadOnlySpan x) => - IndexOfMinMaxCore(x); + IndexOfMinMaxCore(x); /// Searches for the index of the smallest single-precision floating-point number in the specified tensor. /// The tensor, represented as a span. @@ -336,7 +336,7 @@ public static int IndexOfMaxMagnitude(ReadOnlySpan x) => /// /// public static int IndexOfMin(ReadOnlySpan x) => - IndexOfMinMaxCore(x); + IndexOfMinMaxCore(x); /// Searches for the index of the single-precision floating-point number with the smallest magnitude in the specified tensor. /// The tensor, represented as a span. @@ -353,7 +353,7 @@ public static int IndexOfMin(ReadOnlySpan x) => /// /// public static int IndexOfMinMagnitude(ReadOnlySpan x) => - IndexOfMinMaxCore(x); + IndexOfMinMaxCore(x); /// Computes the element-wise natural (base e) logarithm of single-precision floating-point numbers in the specified tensor. /// The tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs index 3747906c5317e..1148639d48be0 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs @@ -1,7 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// This file exists to enable TensorPrimitives.float.cs to be compiled for both +// This file exists to enable TensorPrimitives.Single.cs to be compiled for both // netstandard2.0 and net8.0+ targets. It uses the XX_Single names and the operation // methods tied to float, whereas the net8.0+ worker implementations use generic math. // This file provides float-bound types and type defs that route one to the other. @@ -14,6 +14,10 @@ global using DivideOperator_Single = System.Numerics.Tensors.TensorPrimitives.DivideOperator; global using MultiplyOperator_Single = System.Numerics.Tensors.TensorPrimitives.MultiplyOperator; global using ExpOperator_Single = System.Numerics.Tensors.TensorPrimitives.ExpOperator; +global using IndexOfMaxOperator_Single = System.Numerics.Tensors.TensorPrimitives.IndexOfMaxOperator; +global using IndexOfMaxMagnitudeOperator_Single = System.Numerics.Tensors.TensorPrimitives.IndexOfMaxMagnitudeOperator; +global using IndexOfMinOperator_Single = System.Numerics.Tensors.TensorPrimitives.IndexOfMinOperator; +global using IndexOfMinMagnitudeOperator_Single = System.Numerics.Tensors.TensorPrimitives.IndexOfMinMagnitudeOperator; global using LogOperator_Single = System.Numerics.Tensors.TensorPrimitives.LogOperator; global using Log2Operator_Single = System.Numerics.Tensors.TensorPrimitives.Log2Operator; global using MaxOperator_Single = System.Numerics.Tensors.TensorPrimitives.MaxOperator; @@ -33,12 +37,6 @@ global using SquaredOperator_Single = System.Numerics.Tensors.TensorPrimitives.SquaredOperator; global using TanhOperator_Single = System.Numerics.Tensors.TensorPrimitives.TanhOperator; -// TODO: These should be made generic. Their implementations are still currently bound to float. -global using IndexOfMaxOperator_Single = System.Numerics.Tensors.TensorPrimitives.IndexOfMaxOperator; -global using IndexOfMaxMagnitudeOperator_Single = System.Numerics.Tensors.TensorPrimitives.IndexOfMaxMagnitudeOperator; -global using IndexOfMinOperator_Single = System.Numerics.Tensors.TensorPrimitives.IndexOfMinOperator; -global using IndexOfMinMagnitudeOperator_Single = System.Numerics.Tensors.TensorPrimitives.IndexOfMinMagnitudeOperator; - namespace System.Numerics.Tensors { public static unsafe partial class TensorPrimitives diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index f9a44e8680123..b97f57499a84d 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -1,8 +1,9 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: -// - Provide generic overloads for the IndexOfMin/Max{Magnitude} methods +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + namespace System.Numerics.Tensors { /// Performs primitive tensor operations over spans of memory. @@ -16,7 +17,7 @@ public static partial class TensorPrimitives /// is a signed integer type and contained a value equal to 's minimum value. /// /// - /// This method effectively computes [i] = MathF.Abs([i]). + /// This method effectively computes [i] = .Abs([i]). /// /// /// The absolute value of a is its numeric value without its sign. For example, the absolute value of both 1.2e-03 and -1.2e03 is 1.2e03. @@ -30,192 +31,211 @@ public static void Abs(ReadOnlySpan x, Span destination) where T : INumberBase => InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise addition of numbers in the specified tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a span. + /// Computes the element-wise angle in radians whose cosine is the specifed number. + /// The tensor, represented as a span. /// The destination tensor, represented as a span. - /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = [i] + [i]. + /// This method effectively computes [i] = .Acos([i]). /// /// - /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. /// /// - public static void Add(ReadOnlySpan x, ReadOnlySpan y, Span destination) - where T : IAdditionOperators, IAdditiveIdentity => - InvokeSpanSpanIntoSpan>(x, y, destination); + public static void Acos(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise addition of numbers in the specified tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a scalar. + /// Computes the element-wise hyperbolic arc-cosine of the specifed number. + /// The tensor, represented as a span. /// The destination tensor, represented as a span. /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = [i] + . + /// This method effectively computes [i] = .Acosh([i]). /// /// - /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. /// /// - public static void Add(ReadOnlySpan x, T y, Span destination) - where T : IAdditionOperators, IAdditiveIdentity => - InvokeSpanScalarIntoSpan>(x, y, destination); + public static void Acosh(ReadOnlySpan x, Span destination) + where T : IHyperbolicFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise result of ( + ) * for the specified tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a span. - /// The third tensor, represented as a span. + /// Computes the element-wise angle in radians whose cosine is the specifed number and divides the result by Pi. + /// The tensor, represented as a span. /// The destination tensor, represented as a span. - /// Length of must be same as length of and the length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = ([i] + [i]) * [i]. + /// This method effectively computes [i] = .AcosPi([i]). /// /// - /// If any of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. /// /// - public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan multiplier, Span destination) - where T : IAdditionOperators, IMultiplyOperators => - InvokeSpanSpanSpanIntoSpan>(x, y, multiplier, destination); + public static void AcosPi(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise result of ( + ) * for the specified tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a span. - /// The third tensor, represented as a scalar. + /// Computes the element-wise angle in radians whose sine is the specifed number. + /// The tensor, represented as a span. /// The destination tensor, represented as a span. - /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = ([i] + [i]) * . + /// This method effectively computes [i] = .Asin([i]). /// /// - /// If any of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. /// /// - public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, T multiplier, Span destination) - where T : IAdditionOperators, IMultiplyOperators => - InvokeSpanSpanScalarIntoSpan>(x, y, multiplier, destination); + public static void Asin(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise result of ( + ) * for the specified tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a scalar. - /// The third tensor, represented as a span. + /// Computes the element-wise hyperbolic arc-sine of the specifed number. + /// The tensor, represented as a span. /// The destination tensor, represented as a span. - /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = ([i] + ) * [i]. + /// This method effectively computes [i] = .Asinh([i]). /// /// - /// If any of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. /// /// - public static void AddMultiply(ReadOnlySpan x, T y, ReadOnlySpan multiplier, Span destination) - where T : IAdditionOperators, IMultiplyOperators => - InvokeSpanScalarSpanIntoSpan>(x, y, multiplier, destination); + public static void Asinh(ReadOnlySpan x, Span destination) + where T : IHyperbolicFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise hyperbolic cosine of each radian angle in the specified tensor. + /// Computes the element-wise angle in radians whose sine is the specifed number and divides the result by Pi. /// The tensor, represented as a span. /// The destination tensor, represented as a span. /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = .Cosh([i]). + /// This method effectively computes [i] = .AsinPi([i]). /// /// - /// If a value is equal to or , the result stored into the corresponding destination location is set to . - /// If a value is equal to , the result stored into the corresponding destination location is also NaN. + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. /// + /// + public static void AsinPi(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise angle in radians whose tangent is the specifed number. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// /// - /// The angles in x must be in radians. Use or multiply by /180 to convert degrees to radians. + /// This method effectively computes [i] = .Atan([i]). /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different /// operating systems or architectures. /// /// - public static void Cosh(ReadOnlySpan x, Span destination) - where T : IHyperbolicFunctions => - InvokeSpanIntoSpan>(x, destination); + public static void Atan(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the cosine similarity between the two specified non-empty, equal-length tensors of numbers. - /// The first tensor, represented as a span. - /// The second tensor, represented as a span. - /// The cosine similarity of the two tensors. - /// Length of must be same as length of . - /// and must not be empty. + /// Computes the element-wise hyperbolic arc-tangent of the specifed number. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes TensorPrimitives.Dot(x, y) / (MathF.Sqrt(TensorPrimitives.SumOfSquares(x)) * MathF.Sqrt(TensorPrimitives.SumOfSquares(y)). + /// This method effectively computes [i] = .Atanh([i]). /// /// - /// If any element in either input tensor is equal to , , or , - /// NaN is returned. + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Atanh(ReadOnlySpan x, Span destination) + where T : IHyperbolicFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise angle in radians whose tangent is the specifed number and divides the result by Pi. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .AtanPi([i]). /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different /// operating systems or architectures. /// /// - public static T CosineSimilarity(ReadOnlySpan x, ReadOnlySpan y) - where T : IRootFunctions => - CosineSimilarityCore(x, y); + public static void AtanPi(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the distance between two points, specified as non-empty, equal-length tensors of numbers, in Euclidean space. - /// The first tensor, represented as a span. - /// The second tensor, represented as a span. - /// The Euclidean distance. - /// Length of must be same as length of . - /// and must not be empty. + /// Computes the element-wise arc-tangent for the quotient of two values in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes the equivalent of: - /// - /// Span<T> difference = ...; - /// TensorPrimitives.Subtract(x, y, difference); - /// T result = MathF.Sqrt(TensorPrimitives.SumOfSquares(difference)); - /// - /// but without requiring additional temporary storage for the intermediate differences. + /// This method effectively computes [i] = .Atan2([i], [i]). /// /// - /// If any element in either input tensor is equal to , NaN is returned. + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Atan2(ReadOnlySpan y, ReadOnlySpan x, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanSpanIntoSpan>(y, x, destination); + + /// Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Atan2([i], [i]). /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different /// operating systems or architectures. /// /// - public static T Distance(ReadOnlySpan x, ReadOnlySpan y) - where T : IRootFunctions - { - if (x.IsEmpty) - { - ThrowHelper.ThrowArgument_SpansMustBeNonEmpty(); - } - - return T.Sqrt(Aggregate, AddOperator>(x, y)); - } + public static void Atan2Pi(ReadOnlySpan y, ReadOnlySpan x, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanSpanIntoSpan>(y, x, destination); - /// Computes the element-wise division of numbers in the specified tensors. + /// Computes the element-wise addition of numbers in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a span. /// The destination tensor, represented as a span. @@ -223,226 +243,133 @@ public static T Distance(ReadOnlySpan x, ReadOnlySpan y) /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. /// and reference overlapping memory locations and do not begin at the same location. - /// is an integer type and an element in is equal to zero. /// /// - /// This method effectively computes [i] = [i] / [i]. + /// This method effectively computes [i] = [i] + [i]. /// /// /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void Divide(ReadOnlySpan x, ReadOnlySpan y, Span destination) - where T : IDivisionOperators => - InvokeSpanSpanIntoSpan>(x, y, destination); + public static void Add(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : IAdditionOperators, IAdditiveIdentity => + InvokeSpanSpanIntoSpan>(x, y, destination); - /// Computes the element-wise division of numbers in the specified tensors. + /// Computes the element-wise addition of numbers in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a scalar. /// The destination tensor, represented as a span. /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. - /// is an integer type and is equal to zero. /// /// - /// This method effectively computes [i] = [i] / . + /// This method effectively computes [i] = [i] + . /// /// /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void Divide(ReadOnlySpan x, T y, Span destination) - where T : IDivisionOperators => - InvokeSpanScalarIntoSpan>(x, y, destination); + public static void Add(ReadOnlySpan x, T y, Span destination) + where T : IAdditionOperators, IAdditiveIdentity => + InvokeSpanScalarIntoSpan>(x, y, destination); - /// Computes the dot product of two tensors containing numbers. + /// Computes the element-wise result of ( + ) * for the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a span. - /// The dot product. - /// Length of must be same as length of . + /// The third tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of and the length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes the equivalent of: - /// - /// Span<T> products = ...; - /// TensorPrimitives.Multiply(x, y, products); - /// T result = TensorPrimitives.Sum(products); - /// - /// but without requiring additional temporary storage for the intermediate products. It corresponds to the dot method defined by BLAS1. - /// - /// - /// If any of the input elements is equal to , the resulting value is also NaN. + /// This method effectively computes [i] = ([i] + [i]) * [i]. /// /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// If any of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static T Dot(ReadOnlySpan x, ReadOnlySpan y) - where T : IAdditionOperators, IAdditiveIdentity, IMultiplyOperators, IMultiplicativeIdentity => - Aggregate, AddOperator>(x, y); + public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan multiplier, Span destination) + where T : IAdditionOperators, IMultiplyOperators => + InvokeSpanSpanSpanIntoSpan>(x, y, multiplier, destination); - /// Computes the element-wise result of raising e to the number powers in the specified tensor. - /// The tensor, represented as a span. + /// Computes the element-wise result of ( + ) * for the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The third tensor, represented as a scalar. /// The destination tensor, represented as a span. + /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = .Exp([i]). - /// - /// - /// If a value equals or , the result stored into the corresponding destination location is set to NaN. - /// If a value equals , the result stored into the corresponding destination location is set to 0. + /// This method effectively computes [i] = ([i] + [i]) * . /// /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// If any of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void Exp(ReadOnlySpan x, Span destination) - where T : IExponentialFunctions => - InvokeSpanIntoSpan>(x, destination); - - // TODO: Make IndexOfXx methods generic - // - ///// Searches for the index of the largest number in the specified tensor. - ///// The tensor, represented as a span. - ///// The index of the maximum element in , or -1 if is empty. - ///// - ///// - ///// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If any value equal to - ///// is present, the index of the first is returned. Positive 0 is considered greater than negative 0. - ///// - ///// - ///// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - ///// operating systems or architectures. - ///// - ///// - //public static int IndexOfMax(ReadOnlySpan x) - // where T : INumber => - // IndexOfMinMaxCore>(x); - // - ///// Searches for the index of the number with the largest magnitude in the specified tensor. - ///// The tensor, represented as a span. - ///// The index of the element in with the largest magnitude (absolute value), or -1 if is empty. - ///// - ///// - ///// The determination of the maximum magnitude matches the IEEE 754:2019 `maximumMagnitude` function. If any value equal to - ///// is present, the index of the first is returned. If two values have the same magnitude and one is positive and the other is negative, - ///// the positive value is considered to have the larger magnitude. - ///// - ///// - ///// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - ///// operating systems or architectures. - ///// - ///// - //public static int IndexOfMaxMagnitude(ReadOnlySpan x) - // where T : INumberBase => - // IndexOfMinMaxCore>(x); - // - ///// Searches for the index of the smallest number in the specified tensor. - ///// The tensor, represented as a span. - ///// The index of the minimum element in , or -1 if is empty. - ///// - ///// - ///// The determination of the minimum element matches the IEEE 754:2019 `minimum` function. If any value equal to - ///// is present, the index of the first is returned. Negative 0 is considered smaller than positive 0. - ///// - ///// - ///// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - ///// operating systems or architectures. - ///// - ///// - //public static int IndexOfMin(ReadOnlySpan x) - // where T : INumber => - // IndexOfMinMaxCore>(x); - // - ///// Searches for the index of the number with the smallest magnitude in the specified tensor. - ///// The tensor, represented as a span. - ///// The index of the element in with the smallest magnitude (absolute value), or -1 if is empty. - ///// - ///// - ///// The determination of the minimum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If any value equal to - ///// is present, the index of the first is returned. If two values have the same magnitude and one is positive and the other is negative, - ///// the negative value is considered to have the smaller magnitude. - ///// - ///// - ///// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - ///// operating systems or architectures. - ///// - ///// - //public static int IndexOfMinMagnitude(ReadOnlySpan x) - // where T : INumberBase => - // IndexOfMinMaxCore>(x); + public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, T multiplier, Span destination) + where T : IAdditionOperators, IMultiplyOperators => + InvokeSpanSpanScalarIntoSpan>(x, y, multiplier, destination); - /// Computes the element-wise natural (base e) logarithm of numbers in the specified tensor. - /// The tensor, represented as a span. + /// Computes the element-wise result of ( + ) * for the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The third tensor, represented as a span. /// The destination tensor, represented as a span. + /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = .Log([i]). - /// - /// - /// If a value equals 0, the result stored into the corresponding destination location is set to . - /// If a value is negative or equal to , the result stored into the corresponding destination location is set to NaN. - /// If a value is positive infinity, the result stored into the corresponding destination location is set to . - /// Otherwise, if a value is positive, its natural logarithm is stored into the corresponding destination location. + /// This method effectively computes [i] = ([i] + ) * [i]. /// /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// If any of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void Log(ReadOnlySpan x, Span destination) - where T : ILogarithmicFunctions => - InvokeSpanIntoSpan>(x, destination); + public static void AddMultiply(ReadOnlySpan x, T y, ReadOnlySpan multiplier, Span destination) + where T : IAdditionOperators, IMultiplyOperators => + InvokeSpanScalarSpanIntoSpan>(x, y, multiplier, destination); - /// Computes the element-wise base 2 logarithm of numbers in the specified tensor. - /// The tensor, represented as a span. + /// Computes the element-wise bitwise AND of numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. /// The destination tensor, represented as a span. + /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = .Log2([i]). - /// - /// - /// If a value equals 0, the result stored into the corresponding destination location is set to . - /// If a value is negative or equal to , the result stored into the corresponding destination location is set to NaN. - /// If a value is positive infinity, the result stored into the corresponding destination location is set to . - /// Otherwise, if a value is positive, its natural logarithm is stored into the corresponding destination location. - /// - /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// This method effectively computes [i] = [i] & [i]. /// /// - public static void Log2(ReadOnlySpan x, Span destination) - where T : ILogarithmicFunctions => - InvokeSpanIntoSpan>(x, destination); + public static void BitwiseAnd(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : IBitwiseOperators => + InvokeSpanSpanIntoSpan>(x, y, destination); - /// Searches for the largest number in the specified tensor. - /// The tensor, represented as a span. - /// The maximum element in . - /// Length of must be greater than zero. + /// Computes the element-wise bitwise AND of numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If any value equal to - /// is present, the first is returned. Positive 0 is considered greater than negative 0. - /// - /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// This method effectively computes [i] = [i] & . /// /// - public static T Max(ReadOnlySpan x) - where T : INumber => - MinMaxCore>(x); + public static void BitwiseAnd(ReadOnlySpan x, T y, Span destination) + where T : IBitwiseOperators => + InvokeSpanScalarIntoSpan>(x, y, destination); - /// Computes the element-wise maximum of the numbers in the specified tensors. + /// Computes the element-wise bitwise OR of numbers in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a span. /// The destination tensor, represented as a span. @@ -452,41 +379,43 @@ public static T Max(ReadOnlySpan x) /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = MathF.Max([i], [i]). - /// - /// - /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to , - /// that value is stored as the result. Positive 0 is considered greater than negative 0. - /// - /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// This method effectively computes [i] = [i] | [i]. /// /// - public static void Max(ReadOnlySpan x, ReadOnlySpan y, Span destination) - where T : INumber => - InvokeSpanSpanIntoSpan>(x, y, destination); + public static void BitwiseOr(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : IBitwiseOperators => + InvokeSpanSpanIntoSpan>(x, y, destination); - /// Searches for the number with the largest magnitude in the specified tensor. - /// The tensor, represented as a span. - /// The element in with the largest magnitude (absolute value). - /// Length of must be greater than zero. + /// Computes the element-wise bitwise OR of numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// The determination of the maximum magnitude matches the IEEE 754:2019 `maximumMagnitude` function. If any value equal to - /// is present, the first is returned. If two values have the same magnitude and one is positive and the other is negative, - /// the positive value is considered to have the larger magnitude. + /// This method effectively computes [i] = [i] | . /// + /// + public static void BitwiseOr(ReadOnlySpan x, T y, Span destination) + where T : IBitwiseOperators => + InvokeSpanScalarIntoSpan>(x, y, destination); + + /// Computes the element-wise ceiling of numbers in the specified tensor. + /// The first tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// This method effectively computes [i] = T.Ceiling([i]). /// /// - public static T MaxMagnitude(ReadOnlySpan x) - where T : INumberBase => - MinMaxCore>(x); + public static void Ceiling(ReadOnlySpan x, Span destination) + where T : IFloatingPoint => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise number with the largest magnitude in the specified tensors. + /// Computes the element-wise result of copying the sign from one number to another number in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a span. /// The destination tensor, represented as a span. @@ -494,234 +423,1169 @@ public static T MaxMagnitude(ReadOnlySpan x) /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. /// and reference overlapping memory locations and do not begin at the same location. - /// This method effectively computes [i] = MathF.MaxMagnitude([i], [i]). /// /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// This method effectively computes [i] = T.CopySign([i], [i]). /// /// - public static void MaxMagnitude(ReadOnlySpan x, ReadOnlySpan y, Span destination) - where T : INumberBase => - InvokeSpanSpanIntoSpan>(x, y, destination); + public static void CopySign(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : INumber => + InvokeSpanSpanIntoSpan>(x, y, destination); - /// Searches for the smallest number in the specified tensor. + /// Computes the element-wise cosine of the value in the specified tensor. /// The tensor, represented as a span. - /// The minimum element in . - /// Length of must be greater than zero. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// The determination of the minimum element matches the IEEE 754:2019 `minimum` function. If any value is equal to - /// is present, the first is returned. Negative 0 is considered smaller than positive 0. + /// This method effectively computes [i] = .Cos([i]). + /// + /// + /// The angles in x must be in radians. Use or multiply by .Pi/180 to convert degrees to radians. /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different /// operating systems or architectures. /// /// - public static T Min(ReadOnlySpan x) - where T : INumber => - MinMaxCore>(x); + public static void Cos(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise minimum of the numbers in the specified tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a span. + /// Computes the element-wise cosine of the value in the specified tensor that has been multiplied by Pi. + /// The tensor, represented as a span. /// The destination tensor, represented as a span. - /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = MathF.Max([i], [i]). + /// This method effectively computes [i] = .CosPi([i]). /// /// - /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to , - /// that value is stored as the result. Positive 0 is considered greater than negative 0. + /// The angles in x must be in radians. Use or multiply by .Pi/180 to convert degrees to radians. /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different /// operating systems or architectures. /// /// - public static void Min(ReadOnlySpan x, ReadOnlySpan y, Span destination) - where T : INumber => - InvokeSpanSpanIntoSpan>(x, y, destination); + public static void CosPi(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Searches for the number with the smallest magnitude in the specified tensor. + /// Computes the element-wise hyperbolic cosine of each radian angle in the specified tensor. /// The tensor, represented as a span. - /// The element in with the smallest magnitude (absolute value). - /// Length of must be greater than zero. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// The determination of the minimum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If any value equal to - /// is present, the first is returned. If two values have the same magnitude and one is positive and the other is negative, - /// the negative value is considered to have the smaller magnitude. + /// This method effectively computes [i] = .Cosh([i]). + /// + /// + /// If a value is equal to or , the result stored into the corresponding destination location is set to . + /// If a value is equal to , the result stored into the corresponding destination location is also NaN. + /// + /// + /// The angles in x must be in radians. Use or multiply by .Pi/180 to convert degrees to radians. /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different /// operating systems or architectures. /// /// - public static T MinMagnitude(ReadOnlySpan x) - where T : INumberBase => - MinMaxCore>(x); + public static void Cosh(ReadOnlySpan x, Span destination) + where T : IHyperbolicFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise number with the smallest magnitude in the specified tensors. + /// Computes the cosine similarity between the two specified non-empty, equal-length tensors of numbers. /// The first tensor, represented as a span. /// The second tensor, represented as a span. - /// The destination tensor, represented as a span. + /// The cosine similarity of the two tensors. /// Length of must be same as length of . - /// Destination is too short. - /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. - /// This method effectively computes [i] = MathF.MinMagnitude([i], [i]). + /// and must not be empty. /// /// - /// The determination of the maximum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If either value is equal to , - /// that value is stored as the result. If the two values have the same magnitude and one is positive and the other is negative, - /// the negative value is considered to have the smaller magnitude. + /// This method effectively computes TensorPrimitives.Dot(x, y) / (.Sqrt(TensorPrimitives.SumOfSquares(x)) * .Sqrt(TensorPrimitives.SumOfSquares(y)). + /// + /// + /// If any element in either input tensor is equal to , , or , + /// NaN is returned. /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different /// operating systems or architectures. /// /// - public static void MinMagnitude(ReadOnlySpan x, ReadOnlySpan y, Span destination) - where T : INumberBase => - InvokeSpanSpanIntoSpan>(x, y, destination); + public static T CosineSimilarity(ReadOnlySpan x, ReadOnlySpan y) + where T : IRootFunctions => + CosineSimilarityCore(x, y); - /// Computes the element-wise product of numbers in the specified tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a span. + /// Computes the element-wise cube root of numbers in the specified tensor. + /// The tensor, represented as a span. /// The destination tensor, represented as a span. - /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = [i] * [i]. - /// - /// - /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// This method effectively computes [i] = T.Cbrt([i]). /// /// - public static void Multiply(ReadOnlySpan x, ReadOnlySpan y, Span destination) - where T : IMultiplyOperators, IMultiplicativeIdentity => - InvokeSpanSpanIntoSpan>(x, y, destination); + public static void Cbrt(ReadOnlySpan x, Span destination) + where T : IRootFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise product of numbers in the specified tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a scalar. + /// Computes the element-wise conversion of each number of degrees in the specified tensor to radiansx. + /// The tensor, represented as a span. /// The destination tensor, represented as a span. /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = [i] * . - /// It corresponds to the scal method defined by BLAS1. - /// - /// - /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// This method effectively computes [i] = .DegreesToRadians([i]). /// /// - public static void Multiply(ReadOnlySpan x, T y, Span destination) - where T : IMultiplyOperators, IMultiplicativeIdentity => - InvokeSpanScalarIntoSpan>(x, y, destination); + public static void DegreesToRadians(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); - /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. + /// Computes the distance between two points, specified as non-empty, equal-length tensors of numbers, in Euclidean space. /// The first tensor, represented as a span. /// The second tensor, represented as a span. - /// The third tensor, represented as a span. - /// The destination tensor, represented as a span. - /// Length of must be same as length of and length of . - /// Destination is too short. - /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. + /// The Euclidean distance. + /// Length of must be same as length of . + /// and must not be empty. /// /// - /// This method effectively computes [i] = ([i] * [i]) + [i]. + /// This method effectively computes the equivalent of: + /// + /// Span<T> difference = ...; + /// TensorPrimitives.Subtract(x, y, difference); + /// T result = .Sqrt(TensorPrimitives.SumOfSquares(difference)); + /// + /// but without requiring additional temporary storage for the intermediate differences. /// /// - /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// If any element in either input tensor is equal to , NaN is returned. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. /// /// - public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan addend, Span destination) - where T : IAdditionOperators, IMultiplyOperators => - InvokeSpanSpanSpanIntoSpan>(x, y, addend, destination); + public static T Distance(ReadOnlySpan x, ReadOnlySpan y) + where T : IRootFunctions + { + if (x.IsEmpty) + { + ThrowHelper.ThrowArgument_SpansMustBeNonEmpty(); + } - /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. + return T.Sqrt(Aggregate, AddOperator>(x, y)); + } + + /// Computes the element-wise division of numbers in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a span. - /// The third tensor, represented as a scalar. /// The destination tensor, represented as a span. /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. /// and reference overlapping memory locations and do not begin at the same location. + /// is an integer type and an element in is equal to zero. /// /// - /// This method effectively computes [i] = ([i] * [i]) + . - /// It corresponds to the axpy method defined by BLAS1. + /// This method effectively computes [i] = [i] / [i]. /// /// /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, T addend, Span destination) - where T : IAdditionOperators, IMultiplyOperators => - InvokeSpanSpanScalarIntoSpan>(x, y, addend, destination); + public static void Divide(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : IDivisionOperators => + InvokeSpanSpanIntoSpan>(x, y, destination); - /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. + /// Computes the element-wise division of numbers in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a scalar. - /// The third tensor, represented as a span. /// The destination tensor, represented as a span. - /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. + /// is an integer type and is equal to zero. /// /// - /// This method effectively computes [i] = ([i] * ) + [i]. + /// This method effectively computes [i] = [i] / . /// /// /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void MultiplyAdd(ReadOnlySpan x, T y, ReadOnlySpan addend, Span destination) - where T : IAdditionOperators, IMultiplyOperators => - InvokeSpanScalarSpanIntoSpan>(x, y, addend, destination); + public static void Divide(ReadOnlySpan x, T y, Span destination) + where T : IDivisionOperators => + InvokeSpanScalarIntoSpan>(x, y, destination); - /// Computes the element-wise negation of each number in the specified tensor. - /// The tensor, represented as a span. + /// Computes the element-wise division of numbers in the specified tensors. + /// The first tensor, represented as a scalar. + /// The second tensor, represented as a span. /// The destination tensor, represented as a span. /// Destination is too short. - /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// is an integer type and an element in is equal to zero. /// /// - /// This method effectively computes [i] = -[i]. + /// This method effectively computes [i] = / [i]. /// /// - /// If any of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void Negate(ReadOnlySpan x, Span destination) - where T : IUnaryNegationOperators => - InvokeSpanIntoSpan>(x, destination); + public static void Divide(T x, ReadOnlySpan y, Span destination) + where T : IDivisionOperators => + InvokeSpanScalarIntoSpan>(y, x, destination); - /// Computes the Euclidean norm of the specified tensor of numbers. + /// Computes the dot product of two tensors containing numbers. /// The first tensor, represented as a span. - /// The norm. + /// The second tensor, represented as a span. + /// The dot product. + /// Length of must be same as length of . + /// + /// + /// This method effectively computes the equivalent of: + /// + /// Span<T> products = ...; + /// TensorPrimitives.Multiply(x, y, products); + /// T result = TensorPrimitives.Sum(products); + /// + /// but without requiring additional temporary storage for the intermediate products. It corresponds to the dot method defined by BLAS1. + /// + /// + /// If any of the input elements is equal to , the resulting value is also NaN. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static T Dot(ReadOnlySpan x, ReadOnlySpan y) + where T : IAdditionOperators, IAdditiveIdentity, IMultiplyOperators, IMultiplicativeIdentity => + Aggregate, AddOperator>(x, y); + + /// Computes the element-wise result of raising e to the number powers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Exp([i]). + /// + /// + /// If a value equals or , the result stored into the corresponding destination location is set to NaN. + /// If a value equals , the result stored into the corresponding destination location is set to 0. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Exp(ReadOnlySpan x, Span destination) + where T : IExponentialFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise result of raising e to the number powers in the specified tensor, minus 1. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .ExpM1([i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void ExpM1(ReadOnlySpan x, Span destination) + where T : IExponentialFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise result of raising 2 to the number powers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Exp2([i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Exp2(ReadOnlySpan x, Span destination) + where T : IExponentialFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise result of raising 2 to the number powers in the specified tensor, minus one. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Exp2M1([i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Exp2M1(ReadOnlySpan x, Span destination) + where T : IExponentialFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise result of raising 10 to the number powers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Exp10([i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Exp10(ReadOnlySpan x, Span destination) + where T : IExponentialFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise result of raising 10 to the number powers in the specified tensor, minus one. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Exp10M1([i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Exp10M1(ReadOnlySpan x, Span destination) + where T : IExponentialFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise floor of numbers in the specified tensor. + /// The first tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Floor([i]). + /// + /// + public static void Floor(ReadOnlySpan x, Span destination) + where T : IFloatingPoint => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise hypotensue given values from two tensors representing the lengths of the shorter sides in a right-angled triangle. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Hypot([i], [i]). + /// + /// + public static void Hypot(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : IRootFunctions => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Computes the element-wise remainder of the numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Ieee754Remainder([i], [i]). + /// + /// + public static void Ieee754Remainder(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Computes the element-wise integer logarithm of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.ILogB([i]). + /// + /// + public static void ILogB(ReadOnlySpan x, Span destination) + where T : IFloatingPointIeee754 + { + if (x.Length > destination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(); + } + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + destination[i] = T.ILogB(x[i]); + } + } + + /// Searches for the index of the largest number in the specified tensor. + /// The tensor, represented as a span. + /// The index of the maximum element in , or -1 if is empty. + /// + /// + /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If any value equal to NaN + /// is present, the index of the first is returned. Positive 0 is considered greater than negative 0. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static int IndexOfMax(ReadOnlySpan x) + where T : INumber => + IndexOfMinMaxCore>(x); + + /// Searches for the index of the number with the largest magnitude in the specified tensor. + /// The tensor, represented as a span. + /// The index of the element in with the largest magnitude (absolute value), or -1 if is empty. + /// + /// + /// The determination of the maximum magnitude matches the IEEE 754:2019 `maximumMagnitude` function. If any value equal to NaN + /// is present, the index of the first is returned. If two values have the same magnitude and one is positive and the other is negative, + /// the positive value is considered to have the larger magnitude. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static int IndexOfMaxMagnitude(ReadOnlySpan x) + where T : INumber => + IndexOfMinMaxCore>(x); + + /// Searches for the index of the smallest number in the specified tensor. + /// The tensor, represented as a span. + /// The index of the minimum element in , or -1 if is empty. + /// + /// + /// The determination of the minimum element matches the IEEE 754:2019 `minimum` function. If any value equal to NaN + /// is present, the index of the first is returned. Negative 0 is considered smaller than positive 0. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static int IndexOfMin(ReadOnlySpan x) + where T : INumber => + IndexOfMinMaxCore>(x); + + /// Searches for the index of the number with the smallest magnitude in the specified tensor. + /// The tensor, represented as a span. + /// The index of the element in with the smallest magnitude (absolute value), or -1 if is empty. + /// + /// + /// The determination of the minimum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If any value equal to NaN + /// is present, the index of the first is returned. If two values have the same magnitude and one is positive and the other is negative, + /// the negative value is considered to have the smaller magnitude. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static int IndexOfMinMagnitude(ReadOnlySpan x) + where T : INumber => + IndexOfMinMaxCore>(x); + + /// Computes the element-wise leading zero count of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.LeadingZeroCount([i]). + /// + /// + public static void LeadingZeroCount(ReadOnlySpan x, Span destination) + where T : IBinaryInteger => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise linear interpolation between two values based on the given weight in the specified tensors of numbers. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The third tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of and length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Lerp([i], [i], [i]). + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void Lerp(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan amount, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanSpanSpanIntoSpan>(x, y, amount, destination); + + /// Computes the element-wise natural (base e) logarithm of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Log([i]). + /// + /// + /// If a value equals 0, the result stored into the corresponding destination location is set to . + /// If a value is negative or equal to , the result stored into the corresponding destination location is set to NaN. + /// If a value is positive infinity, the result stored into the corresponding destination location is set to . + /// Otherwise, if a value is positive, its natural logarithm is stored into the corresponding destination location. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Log(ReadOnlySpan x, Span destination) + where T : ILogarithmicFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise base 2 logarithm of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Log2([i]). + /// + /// + /// If a value equals 0, the result stored into the corresponding destination location is set to . + /// If a value is negative or equal to , the result stored into the corresponding destination location is set to NaN. + /// If a value is positive infinity, the result stored into the corresponding destination location is set to . + /// Otherwise, if a value is positive, its base 2 logarithm is stored into the corresponding destination location. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Log2(ReadOnlySpan x, Span destination) + where T : ILogarithmicFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise base 10 logarithm of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Log10([i]). + /// + /// + /// If a value equals 0, the result stored into the corresponding destination location is set to . + /// If a value is negative or equal to , the result stored into the corresponding destination location is set to NaN. + /// If a value is positive infinity, the result stored into the corresponding destination location is set to . + /// Otherwise, if a value is positive, its base 10 logarithm is stored into the corresponding destination location. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Log10(ReadOnlySpan x, Span destination) + where T : ILogarithmicFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise natural (base e) logarithm of numbers in the specified tensor plus 1. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .LogP1([i]). + /// + /// + /// If a value equals 0, the result stored into the corresponding destination location is set to . + /// If a value is negative or equal to , the result stored into the corresponding destination location is set to NaN. + /// If a value is positive infinity, the result stored into the corresponding destination location is set to . + /// Otherwise, if a value is positive, its natural logarithm plus 1 is stored into the corresponding destination location. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void LogP1(ReadOnlySpan x, Span destination) + where T : ILogarithmicFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise base 2 logarithm of numbers in the specified tensor plus 1. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Log2P1([i]). + /// + /// + /// If a value equals 0, the result stored into the corresponding destination location is set to . + /// If a value is negative or equal to , the result stored into the corresponding destination location is set to NaN. + /// If a value is positive infinity, the result stored into the corresponding destination location is set to . + /// Otherwise, if a value is positive, its base 2 logarithm plus 1 is stored into the corresponding destination location. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Log2P1(ReadOnlySpan x, Span destination) + where T : ILogarithmicFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise base 10 logarithm of numbers in the specified tensor plus 1. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Log10P1([i]). + /// + /// + /// If a value equals 0, the result stored into the corresponding destination location is set to . + /// If a value is negative or equal to , the result stored into the corresponding destination location is set to NaN. + /// If a value is positive infinity, the result stored into the corresponding destination location is set to . + /// Otherwise, if a value is positive, its base 10 logarithm plus 1 is stored into the corresponding destination location. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Log10P1(ReadOnlySpan x, Span destination) + where T : ILogarithmicFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise logarithm of the numbers in a specified tensor to the specified base in another specified tensor. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Log([i], [i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Log(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : ILogarithmicFunctions => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Computes the element-wise logarithm of the numbers in a specified tensor to the specified base in another specified tensor. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Log([i], ). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Log(ReadOnlySpan x, T y, Span destination) + where T : ILogarithmicFunctions => + InvokeSpanScalarIntoSpan>(x, y, destination); + + /// Searches for the largest number in the specified tensor. + /// The tensor, represented as a span. + /// The maximum element in . + /// Length of must be greater than zero. + /// + /// + /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If any value equal to + /// is present, the first is returned. Positive 0 is considered greater than negative 0. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static T Max(ReadOnlySpan x) + where T : INumber => + MinMaxCore>(x); + + /// Computes the element-wise maximum of the numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Max([i], [i]). + /// + /// + /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to , + /// that value is stored as the result. Positive 0 is considered greater than negative 0. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Max(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : INumber => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Searches for the number with the largest magnitude in the specified tensor. + /// The tensor, represented as a span. + /// The element in with the largest magnitude (absolute value). + /// Length of must be greater than zero. + /// + /// + /// The determination of the maximum magnitude matches the IEEE 754:2019 `maximumMagnitude` function. If any value equal to + /// is present, the first is returned. If two values have the same magnitude and one is positive and the other is negative, + /// the positive value is considered to have the larger magnitude. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static T MaxMagnitude(ReadOnlySpan x) + where T : INumberBase => + MinMaxCore>(x); + + /// Computes the element-wise number with the largest magnitude in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// This method effectively computes [i] = .MaxMagnitude([i], [i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void MaxMagnitude(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : INumberBase => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Searches for the smallest number in the specified tensor. + /// The tensor, represented as a span. + /// The minimum element in . + /// Length of must be greater than zero. + /// + /// + /// The determination of the minimum element matches the IEEE 754:2019 `minimum` function. If any value is equal to + /// is present, the first is returned. Negative 0 is considered smaller than positive 0. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static T Min(ReadOnlySpan x) + where T : INumber => + MinMaxCore>(x); + + /// Computes the element-wise minimum of the numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Max([i], [i]). + /// + /// + /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to , + /// that value is stored as the result. Positive 0 is considered greater than negative 0. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Min(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : INumber => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Searches for the number with the smallest magnitude in the specified tensor. + /// The tensor, represented as a span. + /// The element in with the smallest magnitude (absolute value). + /// Length of must be greater than zero. + /// + /// + /// The determination of the minimum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If any value equal to + /// is present, the first is returned. If two values have the same magnitude and one is positive and the other is negative, + /// the negative value is considered to have the smaller magnitude. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static T MinMagnitude(ReadOnlySpan x) + where T : INumberBase => + MinMaxCore>(x); + + /// Computes the element-wise number with the smallest magnitude in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// This method effectively computes [i] = .MinMagnitude([i], [i]). + /// + /// + /// The determination of the maximum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If either value is equal to , + /// that value is stored as the result. If the two values have the same magnitude and one is positive and the other is negative, + /// the negative value is considered to have the smaller magnitude. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void MinMagnitude(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : INumberBase => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Computes the element-wise product of numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = [i] * [i]. + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void Multiply(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : IMultiplyOperators, IMultiplicativeIdentity => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Computes the element-wise product of numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = [i] * . + /// It corresponds to the scal method defined by BLAS1. + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void Multiply(ReadOnlySpan x, T y, Span destination) + where T : IMultiplyOperators, IMultiplicativeIdentity => + InvokeSpanScalarIntoSpan>(x, y, destination); + + /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The third tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of and length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = ([i] * [i]) + [i]. + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan addend, Span destination) + where T : IAdditionOperators, IMultiplyOperators => + InvokeSpanSpanSpanIntoSpan>(x, y, addend, destination); + + /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The third tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = ([i] * [i]) + . + /// It corresponds to the axpy method defined by BLAS1. + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, T addend, Span destination) + where T : IAdditionOperators, IMultiplyOperators => + InvokeSpanSpanScalarIntoSpan>(x, y, addend, destination); + + /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The third tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = ([i] * ) + [i]. + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void MultiplyAdd(ReadOnlySpan x, T y, ReadOnlySpan addend, Span destination) + where T : IAdditionOperators, IMultiplyOperators => + InvokeSpanScalarSpanIntoSpan>(x, y, addend, destination); + + /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The third tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of and length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = ([i] * [i]) + [i]. + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void MultiplyAddEstimate(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan addend, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanSpanSpanIntoSpan>(x, y, addend, destination); + + /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The third tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = ([i] * [i]) + . + /// It corresponds to the axpy method defined by BLAS1. + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void MultiplyAddEstimate(ReadOnlySpan x, ReadOnlySpan y, T addend, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanSpanScalarIntoSpan>(x, y, addend, destination); + + /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The third tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = ([i] * ) + [i]. + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void MultiplyAddEstimate(ReadOnlySpan x, T y, ReadOnlySpan addend, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanScalarSpanIntoSpan>(x, y, addend, destination); + + /// Computes the element-wise negation of each number in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = -[i]. + /// + /// + /// If any of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void Negate(ReadOnlySpan x, Span destination) + where T : IUnaryNegationOperators => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the Euclidean norm of the specified tensor of numbers. + /// The first tensor, represented as a span. + /// The norm. + /// + /// + /// This method effectively computes .Sqrt(TensorPrimitives.SumOfSquares(x)). + /// This is often referred to as the Euclidean norm or L2 norm. + /// It corresponds to the nrm2 method defined by BLAS1. + /// + /// + /// If any of the input values is equal to , the result value is also NaN. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static T Norm(ReadOnlySpan x) + where T : IRootFunctions => + T.Sqrt(SumOfSquares(x)); + + /// Computes the element-wise one's complement of numbers in the specified tensor. + /// The first tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = ~[i]. + /// + /// + public static void OnesComplement(ReadOnlySpan x, Span destination) + where T : IBitwiseOperators => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise population count of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.PopCount([i]). + /// + /// + public static void PopCount(ReadOnlySpan x, Span destination) + where T : IBinaryInteger => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise power of a number in a specified tensor raised to a number in another specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Pow([i], [i]). + /// + /// + public static void Pow(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : IPowerFunctions => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Computes the element-wise power of a number in a specified tensor raised to a number in another specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Pow([i], ). + /// + /// + public static void Pow(ReadOnlySpan x, T y, Span destination) + where T : IPowerFunctions => + InvokeSpanScalarIntoSpan>(x, y, destination); + + /// Computes the product of all elements in the specified non-empty tensor of numbers. + /// The tensor, represented as a span. + /// The result of multiplying all elements in . + /// Length of must be greater than zero. /// /// - /// This method effectively computes MathF.Sqrt(TensorPrimitives.SumOfSquares(x)). - /// This is often referred to as the Euclidean norm or L2 norm. - /// It corresponds to the nrm2 method defined by BLAS1. - /// - /// /// If any of the input values is equal to , the result value is also NaN. /// /// @@ -729,96 +1593,421 @@ public static void Negate(ReadOnlySpan x, Span destination) /// operating systems or architectures. /// /// - public static T Norm(ReadOnlySpan x) - where T : IRootFunctions => - T.Sqrt(SumOfSquares(x)); + public static T Product(ReadOnlySpan x) + where T : IMultiplyOperators, IMultiplicativeIdentity + { + if (x.IsEmpty) + { + ThrowHelper.ThrowArgument_SpansMustBeNonEmpty(); + } + + return Aggregate, MultiplyOperator>(x); + } + + /// Computes the product of the element-wise differences of the numbers in the specified non-empty tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The result of multiplying the element-wise subtraction of the elements in the second tensor from the first tensor. + /// Length of both input spans must be greater than zero. + /// and must have the same length. + /// + /// + /// This method effectively computes: + /// + /// Span<T> differences = ...; + /// TensorPrimitives.Subtract(x, y, differences); + /// T result = TensorPrimitives.Product(differences); + /// + /// but without requiring additional temporary storage for the intermediate differences. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static T ProductOfDifferences(ReadOnlySpan x, ReadOnlySpan y) + where T : ISubtractionOperators, IMultiplyOperators, IMultiplicativeIdentity + { + if (x.IsEmpty) + { + ThrowHelper.ThrowArgument_SpansMustBeNonEmpty(); + } + + return Aggregate, MultiplyOperator>(x, y); + } + + /// Computes the product of the element-wise sums of the numbers in the specified non-empty tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The result of multiplying the element-wise additions of the elements in each tensor. + /// Length of both input spans must be greater than zero. + /// and must have the same length. + /// + /// + /// This method effectively computes: + /// + /// Span<T> sums = ...; + /// TensorPrimitives.Add(x, y, sums); + /// T result = TensorPrimitives.Product(sums); + /// + /// but without requiring additional temporary storage for the intermediate sums. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static T ProductOfSums(ReadOnlySpan x, ReadOnlySpan y) + where T : IAdditionOperators, IAdditiveIdentity, IMultiplyOperators, IMultiplicativeIdentity + { + if (x.IsEmpty) + { + ThrowHelper.ThrowArgument_SpansMustBeNonEmpty(); + } + + return Aggregate, MultiplyOperator>(x, y); + } + + /// Computes the element-wise conversion of each number of radians in the specified tensor to degrees. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .RadiansToDegrees([i]). + /// + /// + public static void RadiansToDegrees(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise reciprocal of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// is an integer type and an element in is equal to zero. + /// + /// + /// This method effectively computes [i] = 1 / [i]. + /// + /// + public static void Reciprocal(ReadOnlySpan x, Span destination) + where T : IFloatingPoint => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise reciprocal of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// is an integer type and an element in is equal to zero. + /// + /// + /// This method effectively computes [i] = 1 / [i]. + /// + /// + public static void ReciprocalEstimate(ReadOnlySpan x, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise reciprocal of the square root of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// is an integer type and an element in is equal to zero. + /// + /// + /// This method effectively computes [i] = 1 / [i]. + /// + /// + public static void ReciprocalSqrt(ReadOnlySpan x, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise reciprocal of the square root of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// is an integer type and an element in is equal to zero. + /// + /// + /// This method effectively computes [i] = 1 / [i]. + /// + /// + public static void ReciprocalSqrtEstimate(ReadOnlySpan x, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise rotation right of numbers in the specified tensor by the specified rotation amount. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// The degree of the root to be computed, represented as a scalar. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.RootN([i], ). + /// + /// + public static void RootN(ReadOnlySpan x, int n, Span destination) + where T : IRootFunctions + { + if (x.Length > destination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(); + } + + ValidateInputOutputSpanNonOverlapping(x, destination); + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + destination[i] = T.RootN(x[i], n); + } + } + + /// Computes the element-wise rotation left of numbers in the specified tensor by the specified rotation amount. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// The number of bits to rotate, represented as a scalar. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.RotateLeft([i], ). + /// + /// + public static void RotateLeft(ReadOnlySpan x, int rotateAmount, Span destination) + where T : IBinaryInteger + { + if (x.Length > destination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(); + } + + ValidateInputOutputSpanNonOverlapping(x, destination); + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + destination[i] = T.RotateLeft(x[i], rotateAmount); + } + } + + /// Computes the element-wise rotation right of numbers in the specified tensor by the specified rotation amount. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// The number of bits to rotate, represented as a scalar. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.RotateRight([i], ). + /// + /// + public static void RotateRight(ReadOnlySpan x, int rotateAmount, Span destination) + where T : IBinaryInteger + { + if (x.Length > destination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(); + } + + ValidateInputOutputSpanNonOverlapping(x, destination); + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + destination[i] = T.RotateRight(x[i], rotateAmount); + } + } + + /// Computes the element-wise rounding of the numbers in the specified tensor + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Round([i]). + /// + /// + public static void Round(ReadOnlySpan x, Span destination) + where T : IFloatingPoint => + Round(x, digits: 0, MidpointRounding.ToEven, destination); + + /// Computes the element-wise rounding of the numbers in the specified tensor + /// The tensor, represented as a span. + /// The mode under which should be rounded. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Round([i], ). + /// + /// + public static void Round(ReadOnlySpan x, MidpointRounding mode, Span destination) + where T : IFloatingPoint => + Round(x, digits: 0, mode, destination); + + /// Computes the element-wise rounding of the numbers in the specified tensor + /// The tensor, represented as a span. + /// The number of fractional digits to which the numbers in should be rounded. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Round([i], ). + /// + /// + public static void Round(ReadOnlySpan x, int digits, Span destination) where T : IFloatingPoint => + Round(x, digits, MidpointRounding.ToEven, destination); + + /// Computes the element-wise rounding of the numbers in the specified tensor + /// The tensor, represented as a span. + /// The number of fractional digits to which the numbers in should be rounded. + /// The mode under which should be rounded. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Round([i], , ). + /// + /// + public static void Round(ReadOnlySpan x, int digits, MidpointRounding mode, Span destination) + where T : IFloatingPoint + { + if (x.Length > destination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(); + } + + ValidateInputOutputSpanNonOverlapping(x, destination); + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + destination[i] = T.Round(x[i], digits, mode); + } + } + + /// Computes the element-wise product of numbers in the specified tensor and their base-radix raised to the specified power. + /// The tensor, represented as a span. + /// The value to which base-radix is raised before multipliying x, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.ILogB([i]). + /// + /// + public static void ScaleB(ReadOnlySpan x, int n, Span destination) + where T : IFloatingPointIeee754 + { + if (x.Length > destination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(); + } + + ValidateInputOutputSpanNonOverlapping(x, destination); - /// Computes the product of all elements in the specified non-empty tensor of numbers. + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + destination[i] = T.ScaleB(x[i], n); + } + } + + /// Computes the element-wise shifting left of numbers in the specified tensor by the specified shift amount. /// The tensor, represented as a span. - /// The result of multiplying all elements in . - /// Length of must be greater than zero. + /// The destination tensor, represented as a span. + /// The number of bits to shift, represented as a scalar. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// If any of the input values is equal to , the result value is also NaN. - /// - /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// This method effectively computes [i] = [i] << . /// /// - public static T Product(ReadOnlySpan x) - where T : IMultiplyOperators, IMultiplicativeIdentity + public static void ShiftLeft(ReadOnlySpan x, int shiftAmount, Span destination) + where T : IBinaryInteger { - if (x.IsEmpty) + if (x.Length > destination.Length) { - ThrowHelper.ThrowArgument_SpansMustBeNonEmpty(); + ThrowHelper.ThrowArgument_DestinationTooShort(); } - return Aggregate, MultiplyOperator>(x); + ValidateInputOutputSpanNonOverlapping(x, destination); + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + destination[i] = x[i] << shiftAmount; + } } - /// Computes the product of the element-wise differences of the numbers in the specified non-empty tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a span. - /// The result of multiplying the element-wise subtraction of the elements in the second tensor from the first tensor. - /// Length of both input spans must be greater than zero. - /// and must have the same length. + /// Computes the element-wise arithmetic (signed) shifting right of numbers in the specified tensor by the specified shift amount. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// The number of bits to shift, represented as a scalar. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes: - /// - /// Span<T> differences = ...; - /// TensorPrimitives.Subtract(x, y, differences); - /// T result = TensorPrimitives.Product(differences); - /// - /// but without requiring additional temporary storage for the intermediate differences. - /// - /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// This method effectively computes [i] = [i] >> . /// /// - public static T ProductOfDifferences(ReadOnlySpan x, ReadOnlySpan y) - where T : ISubtractionOperators, IMultiplyOperators, IMultiplicativeIdentity + public static void ShiftRightArithmetic(ReadOnlySpan x, int shiftAmount, Span destination) + where T : IBinaryInteger { - if (x.IsEmpty) + if (x.Length > destination.Length) { - ThrowHelper.ThrowArgument_SpansMustBeNonEmpty(); + ThrowHelper.ThrowArgument_DestinationTooShort(); } - return Aggregate, MultiplyOperator>(x, y); + ValidateInputOutputSpanNonOverlapping(x, destination); + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + destination[i] = x[i] >> shiftAmount; + } } - /// Computes the product of the element-wise sums of the numbers in the specified non-empty tensors. - /// The first tensor, represented as a span. - /// The second tensor, represented as a span. - /// The result of multiplying the element-wise additions of the elements in each tensor. - /// Length of both input spans must be greater than zero. - /// and must have the same length. + /// Computes the element-wise logical (unsigned) shifting right of numbers in the specified tensor by the specified shift amount. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// The number of bits to shift, represented as a scalar. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes: - /// - /// Span<T> sums = ...; - /// TensorPrimitives.Add(x, y, sums); - /// T result = TensorPrimitives.Product(sums); - /// - /// but without requiring additional temporary storage for the intermediate sums. - /// - /// - /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different - /// operating systems or architectures. + /// This method effectively computes [i] = [i] >>> . /// /// - public static T ProductOfSums(ReadOnlySpan x, ReadOnlySpan y) - where T : IAdditionOperators, IAdditiveIdentity, IMultiplyOperators, IMultiplicativeIdentity + public static void ShiftRightLogical(ReadOnlySpan x, int shiftAmount, Span destination) + where T : IBinaryInteger { - if (x.IsEmpty) + if (x.Length > destination.Length) { - ThrowHelper.ThrowArgument_SpansMustBeNonEmpty(); + ThrowHelper.ThrowArgument_DestinationTooShort(); } - return Aggregate, MultiplyOperator>(x, y); + ValidateInputOutputSpanNonOverlapping(x, destination); + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + destination[i] = x[i] >>> shiftAmount; + } } /// Computes the element-wise sigmoid function on the specified non-empty tensor of numbers. @@ -829,7 +2018,7 @@ public static T ProductOfSums(ReadOnlySpan x, ReadOnlySpan y) /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = 1f / (1f + .Exp(-[i])). + /// This method effectively computes [i] = 1f / (1f + .Exp(-[i])). /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different @@ -847,6 +2036,48 @@ public static void Sigmoid(ReadOnlySpan x, Span destination) InvokeSpanIntoSpan>(x, destination); } + /// Computes the element-wise sine of the value in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Sin([i]). + /// + /// + /// The angles in x must be in radians. Use or multiply by .Pi/180 to convert degrees to radians. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Sin(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise sine of the value in the specified tensor that has been multiplied by Pi. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .SinPi([i]). + /// + /// + /// The angles in x must be in radians. Use or multiply by .Pi/180 to convert degrees to radians. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void SinPi(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); + /// Computes the element-wise hyperbolic sine of each radian angle in the specified tensor. /// The tensor, represented as a span. /// The destination tensor, represented as a span. @@ -854,14 +2085,14 @@ public static void Sigmoid(ReadOnlySpan x, Span destination) /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = .Sinh([i]). + /// This method effectively computes [i] = .Sinh([i]). /// /// /// If a value is equal to , , or , /// the corresponding destination location is set to that value. /// /// - /// The angles in x must be in radians. Use or multiply by /180 to convert degrees to radians. + /// The angles in x must be in radians. Use or multiply by .Pi / 180 to convert degrees to radians. /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different @@ -872,6 +2103,80 @@ public static void Sinh(ReadOnlySpan x, Span destination) where T : IHyperbolicFunctions => InvokeSpanIntoSpan>(x, destination); + /// Computes the element-wise sine and cosine of the value in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor for the element-wise sine result, represented as a span. + /// The destination tensor for the element-wise cosine result, represented as a span. + /// Destination is too short. + /// and or reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes ([i], [i]) = .SinCos([i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void SinCos(ReadOnlySpan x, Span sinDestination, Span cosDestination) + where T : ITrigonometricFunctions + { + if (x.Length > sinDestination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(nameof(sinDestination)); + } + if (x.Length > cosDestination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(nameof(cosDestination)); + } + + ValidateInputOutputSpanNonOverlapping(x, sinDestination); + ValidateInputOutputSpanNonOverlapping(x, cosDestination); + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + (sinDestination[i], cosDestination[i]) = T.SinCos(x[i]); + } + } + + /// Computes the element-wise sine and cosine of the value in the specified tensor that has been multiplied by Pi. + /// The tensor, represented as a span. + /// The destination tensor for the element-wise sine result, represented as a span. + /// The destination tensor for the element-wise cosine result, represented as a span. + /// Destination is too short. + /// and or reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes ([i], [i]) = .SinCos([i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void SinCosPi(ReadOnlySpan x, Span sinPiDestination, Span cosPiDestination) + where T : ITrigonometricFunctions + { + if (x.Length > sinPiDestination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(nameof(sinPiDestination)); + } + if (x.Length > cosPiDestination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(nameof(cosPiDestination)); + } + + ValidateInputOutputSpanNonOverlapping(x, sinPiDestination); + ValidateInputOutputSpanNonOverlapping(x, cosPiDestination); + + // TODO: Vectorize + for (int i = 0; i < x.Length; i++) + { + (sinPiDestination[i], cosPiDestination[i]) = T.SinCosPi(x[i]); + } + } + /// Computes the softmax function over the specified non-empty tensor of numbers. /// The tensor, represented as a span. /// The destination tensor. @@ -880,8 +2185,8 @@ public static void Sinh(ReadOnlySpan x, Span destination) /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes a sum of MathF.Exp(x[i]) for all elements in . - /// It then effectively computes [i] = MathF.Exp([i]) / sum. + /// This method effectively computes a sum of .Exp(x[i]) for all elements in . + /// It then effectively computes [i] = .Exp([i]) / sum. /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different @@ -908,6 +2213,20 @@ public static void SoftMax(ReadOnlySpan x, Span destination) InvokeSpanScalarIntoSpan, DivideOperator>(x, expSum, destination); } + /// Computes the element-wise square root of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Sqrt([i]). + /// + /// + public static void Sqrt(ReadOnlySpan x, Span destination) + where T : IRootFunctions => + InvokeSpanIntoSpan>(x, destination); + /// Computes the element-wise difference between numbers in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a scalar. @@ -946,6 +2265,24 @@ public static void Subtract(ReadOnlySpan x, T y, Span destination) where T : ISubtractionOperators => InvokeSpanScalarIntoSpan>(x, y, destination); + /// Computes the element-wise difference between numbers in the specified tensors. + /// The first tensor, represented as a scalar. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = - [i]. + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void Subtract(T x, ReadOnlySpan y, Span destination) + where T : ISubtractionOperators => + InvokeSpanScalarIntoSpan>(y, x, destination); + /// Computes the sum of all elements in the specified tensor of numbers. /// The tensor, represented as a span. /// The result of adding all elements in , or zero if is empty. @@ -1007,6 +2344,48 @@ public static T SumOfSquares(ReadOnlySpan x) where T : IAdditionOperators, IAdditiveIdentity, IMultiplyOperators => Aggregate, AddOperator>(x); + /// Computes the element-wise tangent of the value in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Tan([i]). + /// + /// + /// The angles in x must be in radians. Use or multiply by .Pi/180 to convert degrees to radians. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Tan(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise tangent of the value in the specified tensor that has been multiplied by Pi. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .TanPi([i]). + /// + /// + /// The angles in x must be in radians. Use or multiply by .Pi/180 to convert degrees to radians. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void TanPi(ReadOnlySpan x, Span destination) + where T : ITrigonometricFunctions => + InvokeSpanIntoSpan>(x, destination); + /// Computes the element-wise hyperbolic tangent of each radian angle in the specified tensor. /// The tensor, represented as a span. /// The destination tensor, represented as a span. @@ -1014,7 +2393,7 @@ public static T SumOfSquares(ReadOnlySpan x) /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = .Tanh([i]). + /// This method effectively computes [i] = .Tanh([i]). /// /// /// If a value is equal to , the corresponding destination location is set to -1. @@ -1022,7 +2401,7 @@ public static T SumOfSquares(ReadOnlySpan x) /// If a value is equal to , the corresponding destination location is set to NaN. /// /// - /// The angles in x must be in radians. Use or multiply by /180 to convert degrees to radians. + /// The angles in x must be in radians. Use or multiply by .Pi / 180 to convert degrees to radians. /// /// /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different @@ -1032,5 +2411,65 @@ public static T SumOfSquares(ReadOnlySpan x) public static void Tanh(ReadOnlySpan x, Span destination) where T : IHyperbolicFunctions => InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise trailing zero count of numbers in the specified tensor. + /// The tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.TrailingZeroCount([i]). + /// + /// + public static void TrailingZeroCount(ReadOnlySpan x, Span destination) + where T : IBinaryInteger => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise truncation of numbers in the specified tensor. + /// The first tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Truncate([i]). + /// + /// + public static void Truncate(ReadOnlySpan x, Span destination) + where T : IFloatingPoint => + InvokeSpanIntoSpan>(x, destination); + + /// Computes the element-wise XOR of numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = [i] ^ [i]. + /// + /// + public static void Xor(ReadOnlySpan x, ReadOnlySpan y, Span destination) + where T : IBitwiseOperators => + InvokeSpanSpanIntoSpan>(x, y, destination); + + /// Computes the element-wise XOR of numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = [i] ^ . + /// + /// + public static void Xor(ReadOnlySpan x, T y, Span destination) + where T : IBitwiseOperators => + InvokeSpanScalarIntoSpan>(x, y, destination); } } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 41560fbb8b284..7d3f752c483c3 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -11,9 +11,8 @@ #pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type // TODO: -// - Vectorize the trig-related functions for Ts other than floats +// - Vectorize remaining trig-related functions (some aren't vectorized at all, some are only vectorized for float). // - Vectorize integer operations when sizeof(T) == 1 or 2 (currently only vectorized in most operations for sizeof(T) == 4 or 8). -// - Implement generic version of IndexOfMinMaxCore and corresponding IndexOf methods. namespace System.Numerics.Tensors { @@ -1624,7 +1623,7 @@ private static T Aggregate( nuint remainder = (uint)x.Length; - if (Vector512.IsHardwareAccelerated && Vector512.IsSupported && Unsafe.SizeOf() >= 4) + if (Vector512.IsHardwareAccelerated && Vector512.IsSupported && TBinaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) { T result; @@ -1644,7 +1643,7 @@ private static T Aggregate( return result; } - if (Vector256.IsHardwareAccelerated && Vector256.IsSupported && Unsafe.SizeOf() >= 4) + if (Vector256.IsHardwareAccelerated && Vector256.IsSupported && TBinaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) { T result; @@ -1664,7 +1663,7 @@ private static T Aggregate( return result; } - if (Vector128.IsHardwareAccelerated && Vector128.IsSupported && Unsafe.SizeOf() >= 4) + if (Vector128.IsHardwareAccelerated && Vector128.IsSupported && TBinaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) { T result; @@ -2724,8 +2723,9 @@ private static T MinMaxCore(ReadOnlySpan x) return curResult; } - private static int IndexOfMinMaxCore(ReadOnlySpan x) - where TIndexOfMinMax : struct, IIndexOfOperator + private static int IndexOfMinMaxCore(ReadOnlySpan x) + where T : INumber + where TIndexOfMinMax : struct, IIndexOfOperator { if (x.IsEmpty) { @@ -2737,26 +2737,43 @@ private static int IndexOfMinMaxCore(ReadOnlySpan x) // otherwise returns the index of the greater of the inputs. // It treats +0 as greater than -0 as per the specification. - if (Vector512.IsHardwareAccelerated && x.Length >= Vector512.Count) + if (Vector512.IsHardwareAccelerated && Vector512.IsSupported && x.Length >= Vector512.Count) { - ref float xRef = ref MemoryMarshal.GetReference(x); - Vector512 resultIndex = Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - Vector512 curIndex = resultIndex; - Vector512 increment = Vector512.Create(Vector512.Count); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 CreateVector512T(int i) => + sizeof(T) == sizeof(long) ? Vector512.Create((long)i).As() : + sizeof(T) == sizeof(int) ? Vector512.Create(i).As() : + sizeof(T) == sizeof(short) ? Vector512.Create((short)i).As() : + sizeof(T) == sizeof(byte) ? Vector512.Create((byte)i).As() : + throw new NotSupportedException(); + + ref T xRef = ref MemoryMarshal.GetReference(x); + Vector512 resultIndex = + sizeof(T) == sizeof(long) ? Vector512.Create(0L, 1, 2, 3, 4, 5, 6, 7).As() : + sizeof(T) == sizeof(int) ? Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As() : + sizeof(T) == sizeof(short) ? Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As() : + sizeof(T) == sizeof(byte) ? Vector512.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63).As() : + throw new NotSupportedException(); + Vector512 curIndex = resultIndex; + Vector512 increment = CreateVector512T(Vector512.Count); // Load the first vector as the initial set of results, and bail immediately // to scalar handling if it contains any NaNs (which don't compare equally to themselves). - Vector512 result = Vector512.LoadUnsafe(ref xRef); - Vector512 current; + Vector512 result = Vector512.LoadUnsafe(ref xRef); + Vector512 current; - Vector512 nanMask = ~Vector512.Equals(result, result); - if (nanMask != Vector512.Zero) + Vector512 nanMask; + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - return IndexOfFirstMatch(nanMask); + nanMask = ~Vector512.Equals(result, result); + if (nanMask != Vector512.Zero) + { + return IndexOfFirstMatch(nanMask); + } } - int oneVectorFromEnd = x.Length - Vector512.Count; - int i = Vector512.Count; + int oneVectorFromEnd = x.Length - Vector512.Count; + int i = Vector512.Count; // Aggregate additional vectors into the result as long as there's at least one full vector left to process. while (i <= oneVectorFromEnd) @@ -2765,56 +2782,83 @@ private static int IndexOfMinMaxCore(ReadOnlySpan x) current = Vector512.LoadUnsafe(ref xRef, (uint)i); curIndex += increment; - nanMask = ~Vector512.Equals(current, current); - if (nanMask != Vector512.Zero) + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - return i + IndexOfFirstMatch(nanMask); + nanMask = ~Vector512.Equals(current, current); + if (nanMask != Vector512.Zero) + { + return i + IndexOfFirstMatch(nanMask); + } } TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); - i += Vector512.Count; + i += Vector512.Count; } // If any elements remain, handle them in one final vector. if (i != x.Length) { - current = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512.Count)); - curIndex += Vector512.Create(x.Length - i); + current = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512.Count)); + curIndex += CreateVector512T(x.Length - i); - nanMask = ~Vector512.Equals(current, current); - if (nanMask != Vector512.Zero) + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - return curIndex[IndexOfFirstMatch(nanMask)]; + nanMask = ~Vector512.Equals(current, current); + if (nanMask != Vector512.Zero) + { + int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); + return + sizeof(T) == sizeof(long) ? (int)(long)(object)curIndex.As()[indexInVectorOfFirstMatch] : + sizeof(T) == sizeof(int) ? (int)(object)curIndex.As()[indexInVectorOfFirstMatch] : + throw new NotSupportedException(); + } } TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); } // Aggregate the lanes in the vector to create the final scalar result. - return TIndexOfMinMax.Invoke(result, resultIndex); + return IndexOfFinalAggregate(result, resultIndex); } - if (Vector256.IsHardwareAccelerated && x.Length >= Vector256.Count) + if (Vector256.IsHardwareAccelerated && Vector256.IsSupported && x.Length >= Vector256.Count) { - ref float xRef = ref MemoryMarshal.GetReference(x); - Vector256 resultIndex = Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7); - Vector256 curIndex = resultIndex; - Vector256 increment = Vector256.Create(Vector256.Count); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 CreateVector256T(int i) => + sizeof(T) == sizeof(long) ? Vector256.Create((long)i).As() : + sizeof(T) == sizeof(int) ? Vector256.Create(i).As() : + sizeof(T) == sizeof(short) ? Vector256.Create((short)i).As() : + sizeof(T) == sizeof(byte) ? Vector256.Create((byte)i).As() : + throw new NotSupportedException(); + + ref T xRef = ref MemoryMarshal.GetReference(x); + Vector256 resultIndex = + sizeof(T) == sizeof(long) ? Vector256.Create(0L, 1, 2, 3).As() : + sizeof(T) == sizeof(int) ? Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7).As() : + sizeof(T) == sizeof(short) ? Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As() : + sizeof(T) == sizeof(byte) ? Vector256.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As() : + throw new NotSupportedException(); + Vector256 curIndex = resultIndex; + Vector256 increment = CreateVector256T(Vector256.Count); // Load the first vector as the initial set of results, and bail immediately // to scalar handling if it contains any NaNs (which don't compare equally to themselves). - Vector256 result = Vector256.LoadUnsafe(ref xRef); - Vector256 current; + Vector256 result = Vector256.LoadUnsafe(ref xRef); + Vector256 current; - Vector256 nanMask = ~Vector256.Equals(result, result); - if (nanMask != Vector256.Zero) + Vector256 nanMask; + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - return IndexOfFirstMatch(nanMask); + nanMask = ~Vector256.Equals(result, result); + if (nanMask != Vector256.Zero) + { + return IndexOfFirstMatch(nanMask); + } } - int oneVectorFromEnd = x.Length - Vector256.Count; - int i = Vector256.Count; + int oneVectorFromEnd = x.Length - Vector256.Count; + int i = Vector256.Count; // Aggregate additional vectors into the result as long as there's at least one full vector left to process. while (i <= oneVectorFromEnd) @@ -2823,56 +2867,83 @@ private static int IndexOfMinMaxCore(ReadOnlySpan x) current = Vector256.LoadUnsafe(ref xRef, (uint)i); curIndex += increment; - nanMask = ~Vector256.Equals(current, current); - if (nanMask != Vector256.Zero) + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - return i + IndexOfFirstMatch(nanMask); + nanMask = ~Vector256.Equals(current, current); + if (nanMask != Vector256.Zero) + { + return i + IndexOfFirstMatch(nanMask); + } } TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); - i += Vector256.Count; + i += Vector256.Count; } // If any elements remain, handle them in one final vector. if (i != x.Length) { - current = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256.Count)); - curIndex += Vector256.Create(x.Length - i); + current = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256.Count)); + curIndex += CreateVector256T(x.Length - i); - nanMask = ~Vector256.Equals(current, current); - if (nanMask != Vector256.Zero) + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - return curIndex[IndexOfFirstMatch(nanMask)]; + nanMask = ~Vector256.Equals(current, current); + if (nanMask != Vector256.Zero) + { + int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); + return + sizeof(T) == sizeof(long) ? (int)(long)(object)curIndex.As()[indexInVectorOfFirstMatch] : + sizeof(T) == sizeof(int) ? (int)(object)curIndex.As()[indexInVectorOfFirstMatch] : + throw new NotSupportedException(); + } } TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); } // Aggregate the lanes in the vector to create the final scalar result. - return TIndexOfMinMax.Invoke(result, resultIndex); + return IndexOfFinalAggregate(result, resultIndex); } - if (Vector128.IsHardwareAccelerated && x.Length >= Vector128.Count) + if (Vector128.IsHardwareAccelerated && Vector128.IsSupported && x.Length >= Vector128.Count) { - ref float xRef = ref MemoryMarshal.GetReference(x); - Vector128 resultIndex = Vector128.Create(0, 1, 2, 3); - Vector128 curIndex = resultIndex; - Vector128 increment = Vector128.Create(Vector128.Count); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 CreateVector128T(int i) => + sizeof(T) == sizeof(long) ? Vector128.Create((long)i).As() : + sizeof(T) == sizeof(int) ? Vector128.Create(i).As() : + sizeof(T) == sizeof(short) ? Vector128.Create((short)i).As() : + sizeof(T) == sizeof(byte) ? Vector128.Create((byte)i).As() : + throw new NotSupportedException(); + + ref T xRef = ref MemoryMarshal.GetReference(x); + Vector128 resultIndex = + sizeof(T) == sizeof(long) ? Vector128.Create(0L, 1).As() : + sizeof(T) == sizeof(int) ? Vector128.Create(0, 1, 2, 3).As() : + sizeof(T) == sizeof(short) ? Vector128.Create(0, 1, 2, 3, 4, 5, 6, 7).As() : + sizeof(T) == sizeof(byte) ? Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As() : + throw new NotSupportedException(); + Vector128 curIndex = resultIndex; + Vector128 increment = CreateVector128T(Vector128.Count); // Load the first vector as the initial set of results, and bail immediately // to scalar handling if it contains any NaNs (which don't compare equally to themselves). - Vector128 result = Vector128.LoadUnsafe(ref xRef); - Vector128 current; + Vector128 result = Vector128.LoadUnsafe(ref xRef); + Vector128 current; - Vector128 nanMask = ~Vector128.Equals(result, result); - if (nanMask != Vector128.Zero) + Vector128 nanMask; + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - return IndexOfFirstMatch(nanMask); + nanMask = ~Vector128.Equals(result, result); + if (nanMask != Vector128.Zero) + { + return IndexOfFirstMatch(nanMask); + } } - int oneVectorFromEnd = x.Length - Vector128.Count; - int i = Vector128.Count; + int oneVectorFromEnd = x.Length - Vector128.Count; + int i = Vector128.Count; // Aggregate additional vectors into the result as long as there's at least one full vector left to process. while (i <= oneVectorFromEnd) @@ -2881,49 +2952,58 @@ private static int IndexOfMinMaxCore(ReadOnlySpan x) current = Vector128.LoadUnsafe(ref xRef, (uint)i); curIndex += increment; - nanMask = ~Vector128.Equals(current, current); - if (nanMask != Vector128.Zero) + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - return i + IndexOfFirstMatch(nanMask); + nanMask = ~Vector128.Equals(current, current); + if (nanMask != Vector128.Zero) + { + return i + IndexOfFirstMatch(nanMask); + } } TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); - i += Vector128.Count; + i += Vector128.Count; } // If any elements remain, handle them in one final vector. if (i != x.Length) { - curIndex += Vector128.Create(x.Length - i); - - current = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128.Count)); + current = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128.Count)); + curIndex += CreateVector128T(x.Length - i); - nanMask = ~Vector128.Equals(current, current); - if (nanMask != Vector128.Zero) + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - return curIndex[IndexOfFirstMatch(nanMask)]; + nanMask = ~Vector128.Equals(current, current); + if (nanMask != Vector128.Zero) + { + int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); + return + sizeof(T) == sizeof(long) ? (int)(long)(object)curIndex.As()[indexInVectorOfFirstMatch] : + sizeof(T) == sizeof(int) ? (int)(object)curIndex.As()[indexInVectorOfFirstMatch] : + throw new NotSupportedException(); + } } TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); } // Aggregate the lanes in the vector to create the final scalar result. - return TIndexOfMinMax.Invoke(result, resultIndex); + return IndexOfFinalAggregate(result, resultIndex); } // Scalar path used when either vectorization is not supported or the input is too small to vectorize. - float curResult = x[0]; + T curResult = x[0]; int curIn = 0; - if (float.IsNaN(curResult)) + if (T.IsNaN(curResult)) { return curIn; } for (int i = 1; i < x.Length; i++) { - float current = x[i]; - if (float.IsNaN(current)) + T current = x[i]; + if (T.IsNaN(current)) { return i; } @@ -2934,20 +3014,14 @@ private static int IndexOfMinMaxCore(ReadOnlySpan x) return curIn; } - private static int IndexOfFirstMatch(Vector128 mask) - { - return BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); - } + private static int IndexOfFirstMatch(Vector128 mask) => + BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); - private static int IndexOfFirstMatch(Vector256 mask) - { - return BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); - } + private static int IndexOfFirstMatch(Vector256 mask) => + BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); - private static int IndexOfFirstMatch(Vector512 mask) - { - return BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); - } + private static int IndexOfFirstMatch(Vector512 mask) => + BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); /// Performs an element-wise operation on and writes the results to . /// The element type. @@ -3879,7 +3953,7 @@ private static void InvokeSpanSpanIntoSpan( nuint remainder = (uint)x.Length; - if (Vector512.IsHardwareAccelerated && Vector512.IsSupported && Unsafe.SizeOf() >= 4) + if (Vector512.IsHardwareAccelerated && Vector512.IsSupported && TBinaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) { if (remainder >= (uint)Vector512.Count) { @@ -3897,7 +3971,7 @@ private static void InvokeSpanSpanIntoSpan( return; } - if (Vector256.IsHardwareAccelerated && Vector256.IsSupported && Unsafe.SizeOf() >= 4) + if (Vector256.IsHardwareAccelerated && Vector256.IsSupported && TBinaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) { if (remainder >= (uint)Vector256.Count) { @@ -3915,7 +3989,7 @@ private static void InvokeSpanSpanIntoSpan( return; } - if (Vector128.IsHardwareAccelerated && Vector128.IsSupported && Unsafe.SizeOf() >= 4) + if (Vector128.IsHardwareAccelerated && Vector128.IsSupported && TBinaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) { if (remainder >= (uint)Vector128.Count) { @@ -4904,7 +4978,7 @@ private static void InvokeSpanScalarIntoSpan.IsSupported && TTransformOperator.Vectorizable && Unsafe.SizeOf() >= 4) + if (Vector512.IsHardwareAccelerated && Vector512.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) { if (remainder >= (uint)Vector512.Count) { @@ -4922,7 +4996,7 @@ private static void InvokeSpanScalarIntoSpan.IsSupported && TTransformOperator.Vectorizable && Unsafe.SizeOf() >= 4) + if (Vector256.IsHardwareAccelerated && Vector256.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) { if (remainder >= (uint)Vector256.Count) { @@ -4940,7 +5014,7 @@ private static void InvokeSpanScalarIntoSpan.IsSupported && TTransformOperator.Vectorizable && Unsafe.SizeOf() >= 4) + if (Vector128.IsHardwareAccelerated && Vector128.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) { if (remainder >= (uint)Vector128.Count) { @@ -9417,24 +9491,6 @@ private static Vector512 IsNegative(Vector512 vector) return Vector512.LessThan(vector, Vector512.Zero); } - /// Gets whether the specified is positive. - private static bool IsPositive(float f) => float.IsPositive(f); - - /// Gets whether each specified is positive. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 IsPositive(Vector128 vector) => - Vector128.GreaterThan(vector.AsInt32(), Vector128.AllBitsSet).AsSingle(); - - /// Gets whether each specified is positive. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 IsPositive(Vector256 vector) => - Vector256.GreaterThan(vector.AsInt32(), Vector256.AllBitsSet).AsSingle(); - - /// Gets whether each specified is positive. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector512 IsPositive(Vector512 vector) => - Vector512.GreaterThan(vector.AsInt32(), Vector512.AllBitsSet).AsSingle(); - /// /// Gets a vector mask that will be all-ones-set for the last elements /// and zero for all other elements. @@ -9672,6 +9728,8 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)) /// x + y internal readonly struct AddOperator : IAggregationOperator where T : IAdditionOperators, IAdditiveIdentity { + public static bool Vectorizable => true; + public static T Invoke(T x, T y) => x + y; public static Vector128 Invoke(Vector128 x, Vector128 y) => x + y; public static Vector256 Invoke(Vector256 x, Vector256 y) => x + y; @@ -9687,15 +9745,28 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)) /// x - y internal readonly struct SubtractOperator : IBinaryOperator where T : ISubtractionOperators { + public static bool Vectorizable => true; public static T Invoke(T x, T y) => x - y; public static Vector128 Invoke(Vector128 x, Vector128 y) => x - y; public static Vector256 Invoke(Vector256 x, Vector256 y) => x - y; public static Vector512 Invoke(Vector512 x, Vector512 y) => x - y; } + /// y - x + internal readonly struct InvertedSubtractOperator : IBinaryOperator where T : ISubtractionOperators + { + public static bool Vectorizable => true; + public static T Invoke(T x, T y) => y - x; + public static Vector128 Invoke(Vector128 x, Vector128 y) => y - x; + public static Vector256 Invoke(Vector256 x, Vector256 y) => y - x; + public static Vector512 Invoke(Vector512 x, Vector512 y) => y - x; + } + /// (x - y) * (x - y) internal readonly struct SubtractSquaredOperator : IBinaryOperator where T : ISubtractionOperators, IMultiplyOperators { + public static bool Vectorizable => true; + public static T Invoke(T x, T y) { T tmp = x - y; @@ -9724,6 +9795,8 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) /// x * y internal readonly struct MultiplyOperator : IAggregationOperator where T : IMultiplyOperators, IMultiplicativeIdentity { + public static bool Vectorizable => true; + public static T Invoke(T x, T y) => x * y; public static Vector128 Invoke(Vector128 x, Vector128 y) => x * y; public static Vector256 Invoke(Vector256 x, Vector256 y) => x * y; @@ -9739,15 +9812,210 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) /// x / y internal readonly struct DivideOperator : IBinaryOperator where T : IDivisionOperators { + public static bool Vectorizable => true; public static T Invoke(T x, T y) => x / y; public static Vector128 Invoke(Vector128 x, Vector128 y) => x / y; public static Vector256 Invoke(Vector256 x, Vector256 y) => x / y; public static Vector512 Invoke(Vector512 x, Vector512 y) => x / y; } + /// y / x + internal readonly struct InvertedDivideOperator : IBinaryOperator where T : IDivisionOperators + { + public static bool Vectorizable => true; + public static T Invoke(T x, T y) => y / x; + public static Vector128 Invoke(Vector128 x, Vector128 y) => y / x; + public static Vector256 Invoke(Vector256 x, Vector256 y) => y / x; + public static Vector512 Invoke(Vector512 x, Vector512 y) => y / x; + } + + /// T.Ieee754Remainder(x, y) + internal readonly struct Ieee754RemainderOperator : IBinaryOperator where T : IFloatingPointIeee754 + { + public static bool Vectorizable => false; + public static T Invoke(T x, T y) => T.Ieee754Remainder(x, y); + public static Vector128 Invoke(Vector128 x, Vector128 y) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x, Vector256 y) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x, Vector512 y) => throw new NotSupportedException(); + } + + // Ieee754Remainder + + internal readonly struct ReciprocalOperator : IUnaryOperator where T : IFloatingPoint + { + public static bool Vectorizable => true; + public static T Invoke(T x) => T.One / x; + public static Vector128 Invoke(Vector128 x) => Vector128.One / x; + public static Vector256 Invoke(Vector256 x) => Vector256.One / x; + public static Vector512 Invoke(Vector512 x) => Vector512.One / x; + } + + private readonly struct ReciprocalSqrtOperator : IUnaryOperator where T : IFloatingPointIeee754 + { + public static bool Vectorizable => true; + public static T Invoke(T x) => T.One / T.Sqrt(x); + public static Vector128 Invoke(Vector128 x) => Vector128.One / Vector128.Sqrt(x); + public static Vector256 Invoke(Vector256 x) => Vector256.One / Vector256.Sqrt(x); + public static Vector512 Invoke(Vector512 x) => Vector512.One / Vector512.Sqrt(x); + } + + private readonly struct ReciprocalEstimateOperator : IUnaryOperator where T : IFloatingPointIeee754 + { + public static bool Vectorizable => true; + + public static T Invoke(T x) => T.ReciprocalEstimate(x); + + public static Vector128 Invoke(Vector128 x) + { + if (typeof(T) == typeof(float)) + { + if (Sse.IsSupported) + { + return Sse.Reciprocal(x.AsSingle()).As(); + } + + if (AdvSimd.IsSupported) + { + return AdvSimd.ReciprocalEstimate(x.AsSingle()).As(); + } + } + + return Vector128.One / x; + } + + public static Vector256 Invoke(Vector256 x) + { + if (typeof(T) == typeof(float)) + { + if (Avx.IsSupported) + { + return Avx.Reciprocal(x.AsSingle()).As(); + } + } + + return Vector256.One / x; + } + + public static Vector512 Invoke(Vector512 x) + { + if (Avx512F.IsSupported) + { + if (typeof(T) == typeof(float)) + { + return Avx512F.Reciprocal14(x.AsSingle()).As(); + } + + if (typeof(T) == typeof(double)) + { + return Avx512F.Reciprocal14(x.AsDouble()).As(); + } + } + + return Vector512.One / x; + } + } + + private readonly struct ReciprocalSqrtEstimateOperator : IUnaryOperator where T : IFloatingPointIeee754 + { + public static bool Vectorizable => true; + + public static T Invoke(T x) => T.ReciprocalSqrtEstimate(x); + + public static Vector128 Invoke(Vector128 x) + { + if (typeof(T) == typeof(float)) + { + if (Sse.IsSupported) + { + return Sse.ReciprocalSqrt(x.AsSingle()).As(); + } + + if (AdvSimd.IsSupported) + { + return AdvSimd.ReciprocalSquareRootEstimate(x.AsSingle()).As(); + } + } + + return Vector128.One / Vector128.Sqrt(x); + } + + public static Vector256 Invoke(Vector256 x) + { + if (typeof(T) == typeof(float)) + { + if (Avx.IsSupported) + { + return Avx.ReciprocalSqrt(x.AsSingle()).As(); + } + } + + return Vector256.One / Vector256.Sqrt(x); + } + + public static Vector512 Invoke(Vector512 x) + { + if (Avx512F.IsSupported) + { + if (typeof(T) == typeof(float)) + { + return Avx512F.ReciprocalSqrt14(x.AsSingle()).As(); + } + + if (typeof(T) == typeof(double)) + { + return Avx512F.ReciprocalSqrt14(x.AsDouble()).As(); + } + } + + return Vector512.One / Vector512.Sqrt(x); + } + } + + /// x & y + internal readonly struct BitwiseAndOperator : IBinaryOperator where T : IBitwiseOperators + { + public static bool Vectorizable => true; + public static T Invoke(T x, T y) => x & y; + public static Vector128 Invoke(Vector128 x, Vector128 y) => x & y; + public static Vector256 Invoke(Vector256 x, Vector256 y) => x & y; + public static Vector512 Invoke(Vector512 x, Vector512 y) => x & y; + } + + /// x | y + internal readonly struct BitwiseOrOperator : IBinaryOperator where T : IBitwiseOperators + { + public static bool Vectorizable => true; + public static T Invoke(T x, T y) => x | y; + public static Vector128 Invoke(Vector128 x, Vector128 y) => x | y; + public static Vector256 Invoke(Vector256 x, Vector256 y) => x | y; + public static Vector512 Invoke(Vector512 x, Vector512 y) => x | y; + } + + /// x ^ y + internal readonly struct XorOperator : IBinaryOperator where T : IBitwiseOperators + { + public static bool Vectorizable => true; + public static T Invoke(T x, T y) => x ^ y; + public static Vector128 Invoke(Vector128 x, Vector128 y) => x ^ y; + public static Vector256 Invoke(Vector256 x, Vector256 y) => x ^ y; + public static Vector512 Invoke(Vector512 x, Vector512 y) => x ^ y; + } + + /// ~x + internal readonly struct OnesComplementOperator : IUnaryOperator where T : IBitwiseOperators + { + public static bool Vectorizable => true; + public static T Invoke(T x) => ~x; + public static Vector128 Invoke(Vector128 x) => ~x; + public static Vector256 Invoke(Vector256 x) => ~x; + public static Vector512 Invoke(Vector512 x) => ~x; + } + /// T.Max(x, y) (but NaNs may not be propagated) internal readonly struct MaxOperator : IAggregationOperator where T : INumber { + public static bool Vectorizable => true; + public static T Invoke(T x, T y) { if (typeof(T) == typeof(Half) || typeof(T) == typeof(float) || typeof(T) == typeof(double)) @@ -9842,245 +10110,319 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static T Invoke(Vector512 x) => HorizontalAggregate>(x); } - private interface IIndexOfOperator + private interface IIndexOfOperator where T : INumber { - static abstract int Invoke(ref float result, float current, int resultIndex, int curIndex); - static abstract int Invoke(Vector128 result, Vector128 resultIndex); - static abstract void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex); - static abstract int Invoke(Vector256 result, Vector256 resultIndex); - static abstract void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex); - static abstract int Invoke(Vector512 result, Vector512 resultIndex); - static abstract void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex); + static abstract int Invoke(ref T result, T current, int resultIndex, int curIndex); + static abstract void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex); + static abstract void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex); + static abstract void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex); } - /// Returns the index of MathF.Max(x, y) - internal readonly struct IndexOfMaxOperator : IIndexOfOperator + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int IndexOfFinalAggregate(Vector128 result, Vector128 resultIndex) + where T : INumber + where TIndexOfOperator : struct, IIndexOfOperator { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector128 result, Vector128 maxIndex) - { - Vector128 tmpResult; - Vector128 tmpIndex; - - tmpResult = Vector128.Shuffle(result, Vector128.Create(2, 3, 0, 1)); - tmpIndex = Vector128.Shuffle(maxIndex, Vector128.Create(2, 3, 0, 1)); - Invoke(ref result, tmpResult, ref maxIndex, tmpIndex); + Vector128 tmpResult; + Vector128 tmpIndex; - tmpResult = Vector128.Shuffle(result, Vector128.Create(1, 0, 3, 2)); - tmpIndex = Vector128.Shuffle(maxIndex, Vector128.Create(1, 0, 3, 2)); - Invoke(ref result, tmpResult, ref maxIndex, tmpIndex); + if (sizeof(T) == 8) + { + // Compare 0 with 1 + tmpResult = Vector128.Shuffle(result.AsInt64(), Vector128.Create(1, 0)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsInt64(), Vector128.Create(1, 0)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - return maxIndex.ToScalar(); + // Return 0 + return (int)resultIndex.As().ToScalar(); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector128 max, Vector128 current, ref Vector128 maxIndex, Vector128 curIndex) + if (sizeof(T) == 4) { - Vector128 greaterThanMask = Vector128.GreaterThan(max, current); - - Vector128 equalMask = Vector128.Equals(max, current); - if (equalMask.AsInt32() != Vector128.Zero) - { - Vector128 negativeMask = IsNegative(current); - Vector128 lessThanMask = Vector128.LessThan(maxIndex, curIndex); + // Compare 0,1 with 2,3 + tmpResult = Vector128.Shuffle(result.AsInt32(), Vector128.Create(2, 3, 0, 1)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsInt32(), Vector128.Create(2, 3, 0, 1)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - greaterThanMask |= (negativeMask & equalMask) | (~IsNegative(max) & equalMask & lessThanMask.AsSingle()); - } - - max = ElementWiseSelect(greaterThanMask, max, current); + // Compare 0 with 1 + tmpResult = Vector128.Shuffle(result.AsInt32(), Vector128.Create(1, 0, 3, 2)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsInt32(), Vector128.Create(1, 0, 3, 2)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - maxIndex = ElementWiseSelect(greaterThanMask.AsInt32(), maxIndex, curIndex); + // Return 0 + return resultIndex.As().ToScalar(); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector256 result, Vector256 maxIndex) + if (sizeof(T) == 2) { - // Max the upper/lower halves of the Vector256 - Vector128 resultLower = result.GetLower(); - Vector128 indexLower = maxIndex.GetLower(); + // Compare 0,1,2,3 with 4,5,6,7 + tmpResult = Vector128.Shuffle(result.AsInt16(), Vector128.Create(4, 5, 6, 7, 0, 1, 2, 3)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsInt16(), Vector128.Create(4, 5, 6, 7, 0, 1, 2, 3)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); + + // Compare 0,1 with 2,3 + tmpResult = Vector128.Shuffle(result.AsInt16(), Vector128.Create(2, 3, 0, 1, 4, 5, 6, 7)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsInt16(), Vector128.Create(2, 3, 0, 1, 4, 5, 6, 7)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - Invoke(ref resultLower, result.GetUpper(), ref indexLower, maxIndex.GetUpper()); - return Invoke(resultLower, indexLower); + // Compare 0 with 1 + tmpResult = Vector128.Shuffle(result.AsInt16(), Vector128.Create(1, 0, 2, 3, 4, 5, 6, 7)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsInt16(), Vector128.Create(1, 0, 2, 3, 4, 5, 6, 7)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); + + // Return 0 + return resultIndex.As().ToScalar(); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector256 max, Vector256 current, ref Vector256 maxIndex, Vector256 curIndex) + if (sizeof(T) == 1) { - Vector256 greaterThanMask = Vector256.GreaterThan(max, current); + // Compare 0,1,2,3,4,5,6,7 with 8,9,10,11,12,13,14,15 + tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - Vector256 equalMask = Vector256.Equals(max, current); - if (equalMask.AsInt32() != Vector256.Zero) - { - Vector256 negativeMask = IsNegative(current); - Vector256 lessThanMask = Vector256.LessThan(maxIndex, curIndex); + // Compare 0,1,2,3 with 4,5,6,7 + tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)4, 5, 6, 7, 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)4, 5, 6, 7, 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - greaterThanMask |= (negativeMask & equalMask) | (~IsNegative(max) & equalMask & lessThanMask.AsSingle()); - } + // Compare 0,1 with 2,3 + tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)2, 3, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)2, 3, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - max = ElementWiseSelect(greaterThanMask, max, current); + // Compare 0 with 1 + tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As(); + tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As(); + TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - maxIndex = ElementWiseSelect(greaterThanMask.AsInt32(), maxIndex, curIndex); + // Return 0 + return resultIndex.As().ToScalar(); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector512 result, Vector512 resultIndex) - { - // Min the upper/lower halves of the Vector512 - Vector256 resultLower = result.GetLower(); - Vector256 indexLower = resultIndex.GetLower(); + throw new NotSupportedException(); + } - Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper()); - return Invoke(resultLower, indexLower); - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int IndexOfFinalAggregate(Vector256 result, Vector256 resultIndex) + where T : INumber + where TIndexOfOperator : struct, IIndexOfOperator + { + // Min the upper/lower halves of the Vector256 + Vector128 resultLower = result.GetLower(); + Vector128 indexLower = resultIndex.GetLower(); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector512 max, Vector512 current, ref Vector512 maxIndex, Vector512 curIndex) - { - Vector512 greaterThanMask = Vector512.GreaterThan(max, current); + TIndexOfOperator.Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper()); + return IndexOfFinalAggregate(resultLower, indexLower); + } - Vector512 equalMask = Vector512.Equals(max, current); - if (equalMask.AsInt32() != Vector512.Zero) - { - Vector512 negativeMask = IsNegative(current); - Vector512 lessThanMask = Vector512.LessThan(maxIndex, curIndex); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int IndexOfFinalAggregate(Vector512 result, Vector512 resultIndex) + where T : INumber + where TIndexOfOperator : struct, IIndexOfOperator + { + Vector256 resultLower = result.GetLower(); + Vector256 indexLower = resultIndex.GetLower(); - greaterThanMask |= (negativeMask & equalMask) | (~IsNegative(max) & equalMask & lessThanMask.AsSingle()); - } + TIndexOfOperator.Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper()); + return IndexOfFinalAggregate(resultLower, indexLower); + } - max = ElementWiseSelect(greaterThanMask, max, current); + private static Vector128 IndexLessThan(Vector128 indices1, Vector128 indices2) => + sizeof(T) == sizeof(long) ? Vector128.LessThan(indices1.AsInt64(), indices2.AsInt64()).As() : + sizeof(T) == sizeof(int) ? Vector128.LessThan(indices1.AsInt32(), indices2.AsInt32()).As() : + sizeof(T) == sizeof(short) ? Vector128.LessThan(indices1.AsInt16(), indices2.AsInt16()).As() : + sizeof(T) == sizeof(byte) ? Vector128.LessThan(indices1.AsByte(), indices2.AsByte()).As() : + throw new NotSupportedException(); - maxIndex = ElementWiseSelect(greaterThanMask.AsInt32(), maxIndex, curIndex); - } + private static Vector256 IndexLessThan(Vector256 indices1, Vector256 indices2) => + sizeof(T) == sizeof(long) ? Vector256.LessThan(indices1.AsInt64(), indices2.AsInt64()).As() : + sizeof(T) == sizeof(int) ? Vector256.LessThan(indices1.AsInt32(), indices2.AsInt32()).As() : + sizeof(T) == sizeof(short) ? Vector256.LessThan(indices1.AsInt16(), indices2.AsInt16()).As() : + sizeof(T) == sizeof(byte) ? Vector256.LessThan(indices1.AsByte(), indices2.AsByte()).As() : + throw new NotSupportedException(); + private static Vector512 IndexLessThan(Vector512 indices1, Vector512 indices2) => + sizeof(T) == sizeof(long) ? Vector512.LessThan(indices1.AsInt64(), indices2.AsInt64()).As() : + sizeof(T) == sizeof(int) ? Vector512.LessThan(indices1.AsInt32(), indices2.AsInt32()).As() : + sizeof(T) == sizeof(short) ? Vector512.LessThan(indices1.AsInt16(), indices2.AsInt16()).As() : + sizeof(T) == sizeof(byte) ? Vector512.LessThan(indices1.AsByte(), indices2.AsByte()).As() : + throw new NotSupportedException(); + + /// Returns the index of MathF.Max(x, y) + internal readonly struct IndexOfMaxOperator : IIndexOfOperator where T : INumber + { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(ref float result, float current, int resultIndex, int curIndex) + public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) { - if (result == current) + Vector128 greaterThanMask = Vector128.GreaterThan(result, current); + Vector128 equalMask = Vector128.Equals(result, current); + + if (equalMask != Vector128.Zero) { - if (IsNegative(result) && !IsNegative(current)) + Vector128 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - result = current; - return curIndex; + greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + } + else + { + greaterThanMask |= (equalMask & lessThanIndexMask); } - } - else if (current > result) - { - result = current; - return curIndex; } - return resultIndex; + result = ElementWiseSelect(greaterThanMask, result, current); + resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); } - } - internal readonly struct IndexOfMaxMagnitudeOperator : IIndexOfOperator - { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector128 result, Vector128 maxIndex) + public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) { - Vector128 tmpResult = Vector128.Shuffle(result, Vector128.Create(2, 3, 0, 1)); - Vector128 tmpIndex = Vector128.Shuffle(maxIndex, Vector128.Create(2, 3, 0, 1)); + Vector256 greaterThanMask = Vector256.GreaterThan(result, current); + Vector256 equalMask = Vector256.Equals(result, current); - Invoke(ref result, tmpResult, ref maxIndex, tmpIndex); - - tmpResult = Vector128.Shuffle(result, Vector128.Create(1, 0, 3, 2)); - tmpIndex = Vector128.Shuffle(maxIndex, Vector128.Create(1, 0, 3, 2)); + if (equalMask != Vector256.Zero) + { + Vector256 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + } + else + { + greaterThanMask |= (equalMask & lessThanIndexMask); + } + } - Invoke(ref result, tmpResult, ref maxIndex, tmpIndex); - return maxIndex.ToScalar(); + result = ElementWiseSelect(greaterThanMask, result, current); + resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector128 max, Vector128 current, ref Vector128 maxIndex, Vector128 curIndex) + public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) { - Vector128 maxMag = Vector128.Abs(max), currentMag = Vector128.Abs(current); + Vector512 greaterThanMask = Vector512.GreaterThan(result, current); + Vector512 equalMask = Vector512.Equals(result, current); - Vector128 greaterThanMask = Vector128.GreaterThan(maxMag, currentMag); - - Vector128 equalMask = Vector128.Equals(max, current); - if (equalMask.AsInt32() != Vector128.Zero) + if (equalMask != Vector512.Zero) { - Vector128 negativeMask = IsNegative(current); - Vector128 lessThanMask = Vector128.LessThan(maxIndex, curIndex); - - greaterThanMask |= (negativeMask & equalMask) | (~IsNegative(max) & equalMask & lessThanMask.AsSingle()); + Vector512 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + } + else + { + greaterThanMask |= (equalMask & lessThanIndexMask); + } } - max = ElementWiseSelect(greaterThanMask, max, current); - - maxIndex = ElementWiseSelect(greaterThanMask.AsInt32(), maxIndex, curIndex); + result = ElementWiseSelect(greaterThanMask, result, current); + resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector256 result, Vector256 maxIndex) + public static int Invoke(ref T result, T current, int resultIndex, int curIndex) { - // Max the upper/lower halves of the Vector256 - Vector128 resultLower = result.GetLower(); - Vector128 indexLower = maxIndex.GetLower(); + if (result == current) + { + if (IsNegative(result) && !IsNegative(current)) + { + result = current; + return curIndex; + } + } + else if (current > result) + { + result = current; + return curIndex; + } - Invoke(ref resultLower, result.GetUpper(), ref indexLower, maxIndex.GetUpper()); - return Invoke(resultLower, indexLower); + return resultIndex; } + } + internal readonly struct IndexOfMaxMagnitudeOperator : IIndexOfOperator where T : INumber + { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector256 max, Vector256 current, ref Vector256 maxIndex, Vector256 curIndex) + public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) { - Vector256 maxMag = Vector256.Abs(max), currentMag = Vector256.Abs(current); + Vector128 resultMag = Vector128.Abs(result), currentMag = Vector128.Abs(current); + Vector128 greaterThanMask = Vector128.GreaterThan(resultMag, currentMag); + Vector128 equalMask = Vector128.Equals(resultMag, currentMag); - Vector256 greaterThanMask = Vector256.GreaterThan(maxMag, currentMag); - - Vector256 equalMask = Vector256.Equals(max, current); - if (equalMask.AsInt32() != Vector256.Zero) + if (equalMask != Vector128.Zero) { - Vector256 negativeMask = IsNegative(current); - Vector256 lessThanMask = Vector256.LessThan(maxIndex, curIndex); - - greaterThanMask |= (negativeMask & equalMask) | (~IsNegative(max) & equalMask & lessThanMask.AsSingle()); + Vector128 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + } + else + { + greaterThanMask |= (equalMask & lessThanIndexMask); + } } - max = ElementWiseSelect(greaterThanMask, max, current); - - maxIndex = ElementWiseSelect(greaterThanMask.AsInt32(), maxIndex, curIndex); + result = ElementWiseSelect(greaterThanMask, result, current); + resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector512 result, Vector512 resultIndex) + public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) { - // Min the upper/lower halves of the Vector512 - Vector256 resultLower = result.GetLower(); - Vector256 indexLower = resultIndex.GetLower(); + Vector256 resultMag = Vector256.Abs(result), currentMag = Vector256.Abs(current); + Vector256 greaterThanMask = Vector256.GreaterThan(resultMag, currentMag); + Vector256 equalMask = Vector256.Equals(resultMag, currentMag); + + if (equalMask != Vector256.Zero) + { + Vector256 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + } + else + { + greaterThanMask |= (equalMask & lessThanIndexMask); + } + } - Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper()); - return Invoke(resultLower, indexLower); + result = ElementWiseSelect(greaterThanMask, result, current); + resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector512 max, Vector512 current, ref Vector512 maxIndex, Vector512 curIndex) + public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) { - Vector512 maxMag = Vector512.Abs(max), currentMag = Vector512.Abs(current); - Vector512 greaterThanMask = Vector512.GreaterThan(maxMag, currentMag); + Vector512 resultMag = Vector512.Abs(result), currentMag = Vector512.Abs(current); + Vector512 greaterThanMask = Vector512.GreaterThan(resultMag, currentMag); + Vector512 equalMask = Vector512.Equals(resultMag, currentMag); - Vector512 equalMask = Vector512.Equals(max, current); - if (equalMask.AsInt32() != Vector512.Zero) + if (equalMask != Vector512.Zero) { - Vector512 negativeMask = IsNegative(current); - Vector512 lessThanMask = Vector512.LessThan(maxIndex, curIndex); - - greaterThanMask |= (negativeMask & equalMask) | (~IsNegative(max) & equalMask & lessThanMask.AsSingle()); + Vector512 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + } + else + { + greaterThanMask |= (equalMask & lessThanIndexMask); + } } - max = ElementWiseSelect(greaterThanMask, max, current); - - maxIndex = ElementWiseSelect(greaterThanMask.AsInt32(), maxIndex, curIndex); + result = ElementWiseSelect(greaterThanMask, result, current); + resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(ref float result, float current, int resultIndex, int curIndex) + public static int Invoke(ref T result, T current, int resultIndex, int curIndex) { - float curMaxAbs = MathF.Abs(result); - float currentAbs = MathF.Abs(current); + T resultMag = T.Abs(result); + T currentMag = T.Abs(current); - if (curMaxAbs == currentAbs) + if (resultMag == currentMag) { if (IsNegative(result) && !IsNegative(current)) { @@ -10088,7 +10430,7 @@ public static int Invoke(ref float result, float current, int resultIndex, int c return curIndex; } } - else if (currentAbs > curMaxAbs) + else if (currentMag > resultMag) { result = current; return curIndex; @@ -10099,109 +10441,83 @@ public static int Invoke(ref float result, float current, int resultIndex, int c } /// Returns the index of MathF.Min(x, y) - internal readonly struct IndexOfMinOperator : IIndexOfOperator + internal readonly struct IndexOfMinOperator : IIndexOfOperator where T : INumber { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector128 result, Vector128 resultIndex) - { - Vector128 tmpResult = Vector128.Shuffle(result, Vector128.Create(2, 3, 0, 1)); - Vector128 tmpIndex = Vector128.Shuffle(resultIndex, Vector128.Create(2, 3, 0, 1)); - - Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - - tmpResult = Vector128.Shuffle(result, Vector128.Create(1, 0, 3, 2)); - tmpIndex = Vector128.Shuffle(resultIndex, Vector128.Create(1, 0, 3, 2)); - - Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - return resultIndex.ToScalar(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) + public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) { - Vector128 lessThanMask = Vector128.LessThan(result, current); + Vector128 lessThanMask = Vector128.LessThan(result, current); + Vector128 equalMask = Vector128.Equals(result, current); - Vector128 equalMask = Vector128.Equals(result, current); - if (equalMask.AsInt32() != Vector128.Zero) + if (equalMask != Vector128.Zero) { - Vector128 negativeMask = IsNegative(current); - Vector128 lessThanIndexMask = Vector128.LessThan(resultIndex, curIndex); - - lessThanMask |= (~negativeMask & equalMask) | (IsNegative(result) & equalMask & lessThanIndexMask.AsSingle()); + Vector128 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + } + else + { + lessThanMask |= (equalMask & lessThanIndexMask); + } } result = ElementWiseSelect(lessThanMask, result, current); - - resultIndex = ElementWiseSelect(lessThanMask.AsInt32(), resultIndex, curIndex); - } - - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector256 result, Vector256 resultIndex) - { - // Min the upper/lower halves of the Vector256 - Vector128 resultLower = result.GetLower(); - Vector128 indexLower = resultIndex.GetLower(); - - Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper()); - return Invoke(resultLower, indexLower); + resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) + public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) { - Vector256 lessThanMask = Vector256.LessThan(result, current); + Vector256 lessThanMask = Vector256.LessThan(result, current); + Vector256 equalMask = Vector256.Equals(result, current); - Vector256 equalMask = Vector256.Equals(result, current); - if (equalMask.AsInt32() != Vector256.Zero) + if (equalMask != Vector256.Zero) { - Vector256 negativeMask = IsNegative(current); - Vector256 lessThanIndexMask = Vector256.LessThan(resultIndex, curIndex); - - lessThanMask |= (~negativeMask & equalMask) | (IsNegative(result) & equalMask & lessThanIndexMask.AsSingle()); + Vector256 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + } + else + { + lessThanMask |= (equalMask & lessThanIndexMask); + } } result = ElementWiseSelect(lessThanMask, result, current); - - resultIndex = ElementWiseSelect(lessThanMask.AsInt32(), resultIndex, curIndex); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector512 result, Vector512 resultIndex) - { - // Min the upper/lower halves of the Vector512 - Vector256 resultLower = result.GetLower(); - Vector256 indexLower = resultIndex.GetLower(); - - Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper()); - return Invoke(resultLower, indexLower); + resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) + public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) { - Vector512 lessThanMask = Vector512.LessThan(result, current); + Vector512 lessThanMask = Vector512.LessThan(result, current); + Vector512 equalMask = Vector512.Equals(result, current); - Vector512 equalMask = Vector512.Equals(result, current); - if (equalMask.AsInt32() != Vector512.Zero) + if (equalMask != Vector512.Zero) { - Vector512 negativeMask = IsNegative(current); - Vector512 lessThanIndexMask = Vector512.LessThan(resultIndex, curIndex); - - lessThanMask |= (~negativeMask & equalMask) | (IsNegative(result) & equalMask & lessThanIndexMask.AsSingle()); + Vector512 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + } + else + { + lessThanMask |= (equalMask & lessThanIndexMask); + } } result = ElementWiseSelect(lessThanMask, result, current); - - resultIndex = ElementWiseSelect(lessThanMask.AsInt32(), resultIndex, curIndex); + resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(ref float result, float current, int resultIndex, int curIndex) + public static int Invoke(ref T result, T current, int resultIndex, int curIndex) { if (result == current) { - if (IsPositive(result) && !IsPositive(current)) + if (!IsNegative(result) && IsNegative(current)) { result = current; return curIndex; @@ -10217,122 +10533,95 @@ public static int Invoke(ref float result, float current, int resultIndex, int c } } - internal readonly struct IndexOfMinMagnitudeOperator : IIndexOfOperator + internal readonly struct IndexOfMinMagnitudeOperator : IIndexOfOperator where T : INumber { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector128 result, Vector128 resultIndex) - { - Vector128 tmpResult = Vector128.Shuffle(result, Vector128.Create(2, 3, 0, 1)); - Vector128 tmpIndex = Vector128.Shuffle(resultIndex, Vector128.Create(2, 3, 0, 1)); - - Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - - tmpResult = Vector128.Shuffle(result, Vector128.Create(1, 0, 3, 2)); - tmpIndex = Vector128.Shuffle(resultIndex, Vector128.Create(1, 0, 3, 2)); - - Invoke(ref result, tmpResult, ref resultIndex, tmpIndex); - return resultIndex.ToScalar(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) + public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) { - Vector128 minMag = Vector128.Abs(result), currentMag = Vector128.Abs(current); - - Vector128 lessThanMask = Vector128.LessThan(minMag, currentMag); + Vector128 resultMag = Vector128.Abs(result), currentMag = Vector128.Abs(current); + Vector128 lessThanMask = Vector128.LessThan(resultMag, currentMag); + Vector128 equalMask = Vector128.Equals(resultMag, currentMag); - Vector128 equalMask = Vector128.Equals(result, current); - if (equalMask.AsInt32() != Vector128.Zero) + if (equalMask != Vector128.Zero) { - Vector128 negativeMask = IsNegative(current); - Vector128 lessThanIndexMask = Vector128.LessThan(resultIndex, curIndex); - - lessThanMask |= (~negativeMask & equalMask) | (IsNegative(result) & equalMask & lessThanIndexMask.AsSingle()); + Vector128 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + } + else + { + lessThanMask |= (equalMask & lessThanIndexMask); + } } result = ElementWiseSelect(lessThanMask, result, current); - - resultIndex = ElementWiseSelect(lessThanMask.AsInt32(), resultIndex, curIndex); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector256 result, Vector256 resultIndex) - { - // Min the upper/lower halves of the Vector256 - Vector128 resultLower = result.GetLower(); - Vector128 indexLower = resultIndex.GetLower(); - - Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper()); - return Invoke(resultLower, indexLower); + resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) + public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) { - Vector256 minMag = Vector256.Abs(result), currentMag = Vector256.Abs(current); + Vector256 resultMag = Vector256.Abs(result), currentMag = Vector256.Abs(current); + Vector256 lessThanMask = Vector256.LessThan(resultMag, currentMag); + Vector256 equalMask = Vector256.Equals(resultMag, currentMag); - Vector256 lessThanMask = Vector256.LessThan(minMag, currentMag); - - Vector256 equalMask = Vector256.Equals(result, current); - if (equalMask.AsInt32() != Vector256.Zero) + if (equalMask != Vector256.Zero) { - Vector256 negativeMask = IsNegative(current); - Vector256 lessThanIndexMask = Vector256.LessThan(resultIndex, curIndex); - - lessThanMask |= (~negativeMask & equalMask) | (IsNegative(result) & equalMask & lessThanIndexMask.AsSingle()); + Vector256 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + } + else + { + lessThanMask |= (equalMask & lessThanIndexMask); + } } result = ElementWiseSelect(lessThanMask, result, current); - - resultIndex = ElementWiseSelect(lessThanMask.AsInt32(), resultIndex, curIndex); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(Vector512 result, Vector512 resultIndex) - { - // Min the upper/lower halves of the Vector512 - Vector256 resultLower = result.GetLower(); - Vector256 indexLower = resultIndex.GetLower(); - - Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper()); - return Invoke(resultLower, indexLower); + resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) + public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) { - Vector512 minMag = Vector512.Abs(result), currentMag = Vector512.Abs(current); - - Vector512 lessThanMask = Vector512.LessThan(minMag, currentMag); + Vector512 resultMag = Vector512.Abs(result), currentMag = Vector512.Abs(current); + Vector512 lessThanMask = Vector512.LessThan(resultMag, currentMag); + Vector512 equalMask = Vector512.Equals(resultMag, currentMag); - Vector512 equalMask = Vector512.Equals(result, current); - if (equalMask.AsInt32() != Vector512.Zero) + if (equalMask != Vector512.Zero) { - Vector512 negativeMask = IsNegative(current); - Vector512 lessThanIndexMask = Vector512.LessThan(resultIndex, curIndex); - - lessThanMask |= (~negativeMask & equalMask) | (IsNegative(result) & equalMask & lessThanIndexMask.AsSingle()); + Vector512 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) + { + lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + } + else + { + lessThanMask |= (equalMask & lessThanIndexMask); + } } result = ElementWiseSelect(lessThanMask, result, current); - - resultIndex = ElementWiseSelect(lessThanMask.AsInt32(), resultIndex, curIndex); + resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(ref float result, float current, int resultIndex, int curIndex) + public static int Invoke(ref T result, T current, int resultIndex, int curIndex) { - float curMinAbs = MathF.Abs(result); - float currentAbs = MathF.Abs(current); - if (curMinAbs == currentAbs) + T resultMag = T.Abs(result); + T currentMag = T.Abs(current); + + if (resultMag == currentMag) { - if (IsPositive(result) && !IsPositive(current)) + if (!IsNegative(result) && IsNegative(current)) { result = current; return curIndex; } } - else if (currentAbs < curMinAbs) + else if (currentMag < resultMag) { result = current; return curIndex; @@ -10346,6 +10635,8 @@ public static int Invoke(ref float result, float current, int resultIndex, int c internal readonly struct MaxPropagateNaNOperator : IBinaryOperator where T : INumber { + public static bool Vectorizable => true; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Invoke(T x, T y) => T.Max(x, y); @@ -10419,6 +10710,8 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) internal readonly struct MaxMagnitudeOperator : IAggregationOperator where T : INumberBase { + public static bool Vectorizable => true; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Invoke(T x, T y) => T.MaxMagnitude(x, y); @@ -10506,6 +10799,8 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) internal readonly struct MaxMagnitudePropagateNaNOperator : IBinaryOperator where T : INumberBase { + public static bool Vectorizable => true; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Invoke(T x, T y) => T.MaxMagnitude(x, y); @@ -10574,6 +10869,8 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) internal readonly struct MinOperator : IAggregationOperator where T : INumber { + public static bool Vectorizable => true; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Invoke(T x, T y) { @@ -10647,6 +10944,8 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) internal readonly struct MinPropagateNaNOperator : IBinaryOperator where T : INumber { + public static bool Vectorizable => true; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Invoke(T x, T y) => T.Min(x, y); @@ -10720,6 +11019,8 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) internal readonly struct MinMagnitudeOperator : IAggregationOperator where T : INumberBase { + public static bool Vectorizable => true; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Invoke(T x, T y) => T.MinMagnitude(x, y); @@ -10804,6 +11105,8 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) internal readonly struct MinMagnitudePropagateNaNOperator : IBinaryOperator where T : INumberBase { + public static bool Vectorizable => true; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Invoke(T x, T y) => T.MinMagnitude(x, y); @@ -10896,6 +11199,24 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector512 Invoke(Vector512 x, Vector512 y, Vector512 z) => (x * y) + z; } + /// (x * y) + z + internal readonly struct MultiplyAddEstimateOperator : ITernaryOperator where T : IFloatingPointIeee754 + { + public static T Invoke(T x, T y, T z) => FusedMultiplyAdd(x, y, z); + public static Vector128 Invoke(Vector128 x, Vector128 y, Vector128 z) => FusedMultiplyAdd(x, y, z); + public static Vector256 Invoke(Vector256 x, Vector256 y, Vector256 z) => FusedMultiplyAdd(x, y, z); + public static Vector512 Invoke(Vector512 x, Vector512 y, Vector512 z) => FusedMultiplyAdd(x, y, z); + } + + /// (x * (1 - z)) + (y * z) + internal readonly struct LerpOperator : ITernaryOperator where T : IFloatingPointIeee754 + { + public static T Invoke(T x, T y, T z) => T.Lerp(x, y, z); + public static Vector128 Invoke(Vector128 x, Vector128 y, Vector128 z) => (x * (Vector128.One - z)) + (y * z); + public static Vector256 Invoke(Vector256 x, Vector256 y, Vector256 z) => (x * (Vector256.One - z)) + (y * z); + public static Vector512 Invoke(Vector512 x, Vector512 y, Vector512 z) => (x * (Vector512.One - z)) + (y * z); + } + /// x internal readonly struct IdentityOperator : IUnaryOperator { @@ -11582,76 +11903,345 @@ public static Vector512 Invoke(Vector512 x) } #endif - /// T.Cosh(x) - internal readonly struct CoshOperator : IUnaryOperator - where T : IHyperbolicFunctions + /// T.ExpM1(x) + internal readonly struct ExpM1Operator : IUnaryOperator + where T : IExponentialFunctions { - // This code is based on `vrs4_coshf` from amd/aocl-libm-ose - // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved. - // - // Licensed under the BSD 3-Clause "New" or "Revised" License - // See THIRD-PARTY-NOTICES.TXT for the full license text - - // Spec: - // coshf(|x| > 89.415985107421875) = Infinity - // coshf(Infinity) = infinity - // coshf(-Infinity) = infinity - // - // cosh(x) = (exp(x) + exp(-x))/2 - // cosh(-x) = +cosh(x) - // - // checks for special cases - // if ( asint(x) > infinity) return x with overflow exception and - // return x. - // if x is NaN then raise invalid FP operation exception and return x. - // - // coshf = v/2 * exp(x - log(v)) where v = 0x1.0000e8p-1 + public static bool Vectorizable => ExpOperator.Vectorizable; - private const float LOGV = 0.693161f; - private const float HALFV = 1.0000138f; - private const float INVV2 = 0.24999309f; + public static T Invoke(T x) => T.ExpM1(x); + public static Vector128 Invoke(Vector128 x) => ExpOperator.Invoke(x) - Vector128.One; + public static Vector256 Invoke(Vector256 x) => ExpOperator.Invoke(x) - Vector256.One; + public static Vector512 Invoke(Vector512 x) => ExpOperator.Invoke(x) - Vector512.One; + } - public static bool Vectorizable => typeof(T) == typeof(float); + /// T.Exp2(x) + internal readonly struct Exp2Operator : IUnaryOperator + where T : IExponentialFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize - public static T Invoke(T x) => T.Cosh(x); + public static T Invoke(T x) => T.Exp2(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } - public static Vector128 Invoke(Vector128 t) - { - Debug.Assert(typeof(T) == typeof(float)); - Vector128 x = t.AsSingle(); + /// T.Exp2M1(x) + internal readonly struct Exp2M1Operator : IUnaryOperator + where T : IExponentialFunctions + { + public static bool Vectorizable => Exp2Operator.Vectorizable; - Vector128 y = Vector128.Abs(x); - Vector128 z = ExpOperator.Invoke(y - Vector128.Create(LOGV)); - return (Vector128.Create(HALFV) * (z + (Vector128.Create(INVV2) / z))).As(); - } + public static T Invoke(T x) => T.Exp2M1(x); + public static Vector128 Invoke(Vector128 x) => Exp2Operator.Invoke(x) - Vector128.One; + public static Vector256 Invoke(Vector256 x) => Exp2Operator.Invoke(x) - Vector256.One; + public static Vector512 Invoke(Vector512 x) => Exp2Operator.Invoke(x) - Vector512.One; + } - public static Vector256 Invoke(Vector256 t) - { - Debug.Assert(typeof(T) == typeof(float)); - Vector256 x = t.AsSingle(); + /// T.Exp10(x) + internal readonly struct Exp10Operator : IUnaryOperator + where T : IExponentialFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize - Vector256 y = Vector256.Abs(x); - Vector256 z = ExpOperator.Invoke(y - Vector256.Create(LOGV)); - return (Vector256.Create(HALFV) * (z + (Vector256.Create(INVV2) / z))).As(); - } + public static T Invoke(T x) => T.Exp10(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } - public static Vector512 Invoke(Vector512 t) - { - Debug.Assert(typeof(T) == typeof(float)); - Vector512 x = t.AsSingle(); + /// T.Exp10M1(x) + internal readonly struct Exp10M1Operator : IUnaryOperator + where T : IExponentialFunctions + { + public static bool Vectorizable => Exp2Operator.Vectorizable; - Vector512 y = Vector512.Abs(x); - Vector512 z = ExpOperator.Invoke(y - Vector512.Create(LOGV)); - return (Vector512.Create(HALFV) * (z + (Vector512.Create(INVV2) / z))).As(); - } + public static T Invoke(T x) => T.Exp10M1(x); + public static Vector128 Invoke(Vector128 x) => Exp10Operator.Invoke(x) - Vector128.One; + public static Vector256 Invoke(Vector256 x) => Exp10Operator.Invoke(x) - Vector256.One; + public static Vector512 Invoke(Vector512 x) => Exp10Operator.Invoke(x) - Vector512.One; } - /// T.Sinh(x) - internal readonly struct SinhOperator : IUnaryOperator - where T : IHyperbolicFunctions + /// T.Pow(x, y) + internal readonly struct PowOperator : IBinaryOperator + where T : IPowerFunctions { - // Same as cosh, but with `z -` rather than `z +`, and with the sign - // flipped on the result based on the sign of the input. + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x, T y) => T.Pow(x, y); + public static Vector128 Invoke(Vector128 x, Vector128 y) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x, Vector256 y) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x, Vector512 y) => throw new NotSupportedException(); + } + + /// T.Sqrt(x) + internal readonly struct SqrtOperator : IUnaryOperator + where T : IRootFunctions + { + public static bool Vectorizable => true; + public static T Invoke(T x) => T.Sqrt(x); + public static Vector128 Invoke(Vector128 x) => Vector128.Sqrt(x); + public static Vector256 Invoke(Vector256 x) => Vector256.Sqrt(x); + public static Vector512 Invoke(Vector512 x) => Vector512.Sqrt(x); + } + + /// T.Cbrt(x) + internal readonly struct CbrtOperator : IUnaryOperator + where T : IRootFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Cbrt(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.Hypot(x, y) + internal readonly struct HypotOperator : IBinaryOperator + where T : IRootFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x, T y) => T.Hypot(x, y); + public static Vector128 Invoke(Vector128 x, Vector128 y) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x, Vector256 y) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x, Vector512 y) => throw new NotSupportedException(); + } + + /// T.Acos(x) + internal readonly struct AcosOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Acos(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.Acosh(x) + internal readonly struct AcoshOperator : IUnaryOperator + where T : IHyperbolicFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Acosh(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.AcosPi(x) + internal readonly struct AcosPiOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => AcosOperator.Vectorizable; + public static T Invoke(T x) => T.AcosPi(x); + public static Vector128 Invoke(Vector128 x) => AcosOperator.Invoke(x) / Vector128.Create(T.Pi); + public static Vector256 Invoke(Vector256 x) => AcosOperator.Invoke(x) / Vector256.Create(T.Pi); + public static Vector512 Invoke(Vector512 x) => AcosOperator.Invoke(x) / Vector512.Create(T.Pi); + } + + /// T.Asin(x) + internal readonly struct AsinOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Asin(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.Asinh(x) + internal readonly struct AsinhOperator : IUnaryOperator + where T : IHyperbolicFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Asinh(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.AsinPi(x) + internal readonly struct AsinPiOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => AsinOperator.Vectorizable; + public static T Invoke(T x) => T.AsinPi(x); + public static Vector128 Invoke(Vector128 x) => AsinOperator.Invoke(x) / Vector128.Create(T.Pi); + public static Vector256 Invoke(Vector256 x) => AsinOperator.Invoke(x) / Vector256.Create(T.Pi); + public static Vector512 Invoke(Vector512 x) => AsinOperator.Invoke(x) / Vector512.Create(T.Pi); + } + + /// T.Atan(x) + internal readonly struct AtanOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Atan(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.Atanh(x) + internal readonly struct AtanhOperator : IUnaryOperator + where T : IHyperbolicFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Atanh(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.AtanPi(x) + internal readonly struct AtanPiOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => AtanOperator.Vectorizable; + public static T Invoke(T x) => T.AtanPi(x); + public static Vector128 Invoke(Vector128 x) => AtanOperator.Invoke(x) / Vector128.Create(T.Pi); + public static Vector256 Invoke(Vector256 x) => AtanOperator.Invoke(x) / Vector256.Create(T.Pi); + public static Vector512 Invoke(Vector512 x) => AtanOperator.Invoke(x) / Vector512.Create(T.Pi); + } + + /// T.Atan2(y, x) + internal readonly struct Atan2Operator : IBinaryOperator + where T : IFloatingPointIeee754 + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T y, T x) => T.Atan2(y, x); + public static Vector128 Invoke(Vector128 y, Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 y, Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 y, Vector512 x) => throw new NotSupportedException(); + } + + /// T.Atan2Pi(y, x) + internal readonly struct Atan2PiOperator : IBinaryOperator + where T : IFloatingPointIeee754 + { + public static bool Vectorizable => Atan2Operator.Vectorizable; + public static T Invoke(T y, T x) => T.Atan2Pi(y, x); + public static Vector128 Invoke(Vector128 y, Vector128 x) => Atan2Operator.Invoke(y, x) / Vector128.Create(T.Pi); + public static Vector256 Invoke(Vector256 y, Vector256 x) => Atan2Operator.Invoke(y, x) / Vector256.Create(T.Pi); + public static Vector512 Invoke(Vector512 y, Vector512 x) => Atan2Operator.Invoke(y, x) / Vector512.Create(T.Pi); + } + + /// T.Cos(x) + internal readonly struct CosOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Cos(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.CosPi(x) + internal readonly struct CosPiOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => CosOperator.Vectorizable; + public static T Invoke(T x) => T.CosPi(x); + public static Vector128 Invoke(Vector128 x) => CosOperator.Invoke(x * Vector128.Create(T.Pi)); + public static Vector256 Invoke(Vector256 x) => CosOperator.Invoke(x * Vector256.Create(T.Pi)); + public static Vector512 Invoke(Vector512 x) => CosOperator.Invoke(x * Vector512.Create(T.Pi)); + } + + /// T.Cosh(x) + internal readonly struct CoshOperator : IUnaryOperator + where T : IHyperbolicFunctions + { + // This code is based on `vrs4_coshf` from amd/aocl-libm-ose + // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Spec: + // coshf(|x| > 89.415985107421875) = Infinity + // coshf(Infinity) = infinity + // coshf(-Infinity) = infinity + // + // cosh(x) = (exp(x) + exp(-x))/2 + // cosh(-x) = +cosh(x) + // + // checks for special cases + // if ( asint(x) > infinity) return x with overflow exception and + // return x. + // if x is NaN then raise invalid FP operation exception and return x. + // + // coshf = v/2 * exp(x - log(v)) where v = 0x1.0000e8p-1 + + private const float LOGV = 0.693161f; + private const float HALFV = 1.0000138f; + private const float INVV2 = 0.24999309f; + + public static bool Vectorizable => typeof(T) == typeof(float); + + public static T Invoke(T x) => T.Cosh(x); + + public static Vector128 Invoke(Vector128 t) + { + Debug.Assert(typeof(T) == typeof(float)); + Vector128 x = t.AsSingle(); + + Vector128 y = Vector128.Abs(x); + Vector128 z = ExpOperator.Invoke(y - Vector128.Create(LOGV)); + return (Vector128.Create(HALFV) * (z + (Vector128.Create(INVV2) / z))).As(); + } + + public static Vector256 Invoke(Vector256 t) + { + Debug.Assert(typeof(T) == typeof(float)); + Vector256 x = t.AsSingle(); + + Vector256 y = Vector256.Abs(x); + Vector256 z = ExpOperator.Invoke(y - Vector256.Create(LOGV)); + return (Vector256.Create(HALFV) * (z + (Vector256.Create(INVV2) / z))).As(); + } + + public static Vector512 Invoke(Vector512 t) + { + Debug.Assert(typeof(T) == typeof(float)); + Vector512 x = t.AsSingle(); + + Vector512 y = Vector512.Abs(x); + Vector512 z = ExpOperator.Invoke(y - Vector512.Create(LOGV)); + return (Vector512.Create(HALFV) * (z + (Vector512.Create(INVV2) / z))).As(); + } + } + + /// T.Sin(x) + internal readonly struct SinOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Sin(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.SinPi(x) + internal readonly struct SinPiOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => SinOperator.Vectorizable; + public static T Invoke(T x) => T.SinPi(x); + public static Vector128 Invoke(Vector128 x) => SinOperator.Invoke(x * Vector128.Create(T.Pi)); + public static Vector256 Invoke(Vector256 x) => SinOperator.Invoke(x * Vector256.Create(T.Pi)); + public static Vector512 Invoke(Vector512 x) => SinOperator.Invoke(x * Vector512.Create(T.Pi)); + } + + /// T.Sinh(x) + internal readonly struct SinhOperator : IUnaryOperator + where T : IHyperbolicFunctions + { + // Same as cosh, but with `z -` rather than `z +`, and with the sign + // flipped on the result based on the sign of the input. private const uint SIGN_MASK = 0x7FFFFFFF; private const float LOGV = 0.693161f; @@ -11699,6 +12289,28 @@ public static Vector512 Invoke(Vector512 t) } } + /// T.Tan(x) + internal readonly struct TanOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Tan(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.TanPi(x) + internal readonly struct TanPiOperator : IUnaryOperator + where T : ITrigonometricFunctions + { + public static bool Vectorizable => TanOperator.Vectorizable; + public static T Invoke(T x) => T.TanPi(x); + public static Vector128 Invoke(Vector128 x) => TanOperator.Invoke(x * Vector128.Create(T.Pi)); + public static Vector256 Invoke(Vector256 x) => TanOperator.Invoke(x * Vector256.Create(T.Pi)); + public static Vector512 Invoke(Vector512 x) => TanOperator.Invoke(x * Vector512.Create(T.Pi)); + } + /// T.Tanh(x) internal readonly struct TanhOperator : IUnaryOperator where T : IHyperbolicFunctions @@ -13113,22 +13725,77 @@ public static Vector512 Invoke(Vector512 x) } #endif + /// T.Log10(x) + internal readonly struct Log10Operator : IUnaryOperator + where T : ILogarithmicFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.Log10(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.LogP1(x) + internal readonly struct LogP1Operator : IUnaryOperator + where T : ILogarithmicFunctions + { + public static bool Vectorizable => LogOperator.Vectorizable; + public static T Invoke(T x) => T.LogP1(x); + public static Vector128 Invoke(Vector128 x) => LogOperator.Invoke(x + Vector128.One); + public static Vector256 Invoke(Vector256 x) => LogOperator.Invoke(x + Vector256.One); + public static Vector512 Invoke(Vector512 x) => LogOperator.Invoke(x + Vector512.One); + } + + /// T.Log2P1(x) + internal readonly struct Log2P1Operator : IUnaryOperator + where T : ILogarithmicFunctions + { + public static bool Vectorizable => Log2Operator.Vectorizable; + public static T Invoke(T x) => T.Log2P1(x); + public static Vector128 Invoke(Vector128 x) => Log2Operator.Invoke(x + Vector128.One); + public static Vector256 Invoke(Vector256 x) => Log2Operator.Invoke(x + Vector256.One); + public static Vector512 Invoke(Vector512 x) => Log2Operator.Invoke(x + Vector512.One); + } + + /// T.Log10P1(x) + internal readonly struct Log10P1Operator : IUnaryOperator + where T : ILogarithmicFunctions + { + public static bool Vectorizable => Log10Operator.Vectorizable; + public static T Invoke(T x) => T.Log10P1(x); + public static Vector128 Invoke(Vector128 x) => Log10Operator.Invoke(x + Vector128.One); + public static Vector256 Invoke(Vector256 x) => Log10Operator.Invoke(x + Vector256.One); + public static Vector512 Invoke(Vector512 x) => Log10Operator.Invoke(x + Vector512.One); + } + + /// T.Log(x, y) + internal readonly struct LogBaseOperator : IBinaryOperator + where T : ILogarithmicFunctions + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x, T y) => T.Log(x, y); + public static Vector128 Invoke(Vector128 x, Vector128 y) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x, Vector256 y) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x, Vector512 y) => throw new NotSupportedException(); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 ElementWiseSelect(Vector128 mask, Vector128 left, Vector128 right) { - if (Sse41.IsSupported) - { - if (typeof(T) == typeof(byte)) return Sse41.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As(); - if (typeof(T) == typeof(sbyte)) return Sse41.BlendVariable(left.AsSByte(), right.AsSByte(), (~mask).AsSByte()).As(); - if (typeof(T) == typeof(ushort)) return Sse41.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As(); - if (typeof(T) == typeof(short)) return Sse41.BlendVariable(left.AsInt16(), right.AsInt16(), (~mask).AsInt16()).As(); - if (typeof(T) == typeof(uint)) return Sse41.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); - if (typeof(T) == typeof(int)) return Sse41.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); - if (typeof(T) == typeof(ulong)) return Sse41.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); - if (typeof(T) == typeof(long)) return Sse41.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); - if (typeof(T) == typeof(float)) return Sse41.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); - if (typeof(T) == typeof(double)) return Sse41.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); - } + //if (Sse41.IsSupported) + //{ + // if (typeof(T) == typeof(byte)) return Sse41.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As(); + // if (typeof(T) == typeof(sbyte)) return Sse41.BlendVariable(left.AsSByte(), right.AsSByte(), (~mask).AsSByte()).As(); + // if (typeof(T) == typeof(ushort)) return Sse41.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As(); + // if (typeof(T) == typeof(short)) return Sse41.BlendVariable(left.AsInt16(), right.AsInt16(), (~mask).AsInt16()).As(); + // if (typeof(T) == typeof(uint)) return Sse41.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); + // if (typeof(T) == typeof(int)) return Sse41.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); + // if (typeof(T) == typeof(ulong)) return Sse41.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); + // if (typeof(T) == typeof(long)) return Sse41.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); + // if (typeof(T) == typeof(float)) return Sse41.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); + // if (typeof(T) == typeof(double)) return Sse41.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); + //} return Vector128.ConditionalSelect(mask, left, right); } @@ -13136,19 +13803,19 @@ private static Vector128 ElementWiseSelect(Vector128 mask, Vector128 [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 ElementWiseSelect(Vector256 mask, Vector256 left, Vector256 right) { - if (Avx2.IsSupported) - { - if (typeof(T) == typeof(byte)) return Avx2.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As(); - if (typeof(T) == typeof(sbyte)) return Avx2.BlendVariable(left.AsSByte(), right.AsSByte(), (~mask).AsSByte()).As(); - if (typeof(T) == typeof(ushort)) return Avx2.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As(); - if (typeof(T) == typeof(short)) return Avx2.BlendVariable(left.AsInt16(), right.AsInt16(), (~mask).AsInt16()).As(); - if (typeof(T) == typeof(uint)) return Avx2.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); - if (typeof(T) == typeof(int)) return Avx2.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); - if (typeof(T) == typeof(ulong)) return Avx2.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); - if (typeof(T) == typeof(long)) return Avx2.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); - if (typeof(T) == typeof(float)) return Avx2.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); - if (typeof(T) == typeof(double)) return Avx2.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); - } + //if (Avx2.IsSupported) + //{ + // if (typeof(T) == typeof(byte)) return Avx2.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As(); + // if (typeof(T) == typeof(sbyte)) return Avx2.BlendVariable(left.AsSByte(), right.AsSByte(), (~mask).AsSByte()).As(); + // if (typeof(T) == typeof(ushort)) return Avx2.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As(); + // if (typeof(T) == typeof(short)) return Avx2.BlendVariable(left.AsInt16(), right.AsInt16(), (~mask).AsInt16()).As(); + // if (typeof(T) == typeof(uint)) return Avx2.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); + // if (typeof(T) == typeof(int)) return Avx2.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); + // if (typeof(T) == typeof(ulong)) return Avx2.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); + // if (typeof(T) == typeof(long)) return Avx2.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); + // if (typeof(T) == typeof(float)) return Avx2.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); + // if (typeof(T) == typeof(double)) return Avx2.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); + //} return Vector256.ConditionalSelect(mask, left, right); } @@ -13156,20 +13823,20 @@ private static Vector256 ElementWiseSelect(Vector256 mask, Vector256 [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector512 ElementWiseSelect(Vector512 mask, Vector512 left, Vector512 right) { - if (Avx512F.IsSupported) - { - if (typeof(T) == typeof(uint)) return Avx512F.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); - if (typeof(T) == typeof(int)) return Avx512F.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); - if (typeof(T) == typeof(ulong)) return Avx512F.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); - if (typeof(T) == typeof(long)) return Avx512F.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); - if (typeof(T) == typeof(float)) return Avx512F.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); - if (typeof(T) == typeof(double)) return Avx512F.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); - } + //if (Avx512F.IsSupported) + //{ + // if (typeof(T) == typeof(uint)) return Avx512F.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); + // if (typeof(T) == typeof(int)) return Avx512F.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); + // if (typeof(T) == typeof(ulong)) return Avx512F.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); + // if (typeof(T) == typeof(long)) return Avx512F.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); + // if (typeof(T) == typeof(float)) return Avx512F.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); + // if (typeof(T) == typeof(double)) return Avx512F.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); + //} return Vector512.ConditionalSelect(mask, left, right); } - /// 1f / (1f + MathF.Exp(-x)) + /// 1 / (1 + T.Exp(-x)) internal readonly struct SigmoidOperator : IUnaryOperator where T : IExponentialFunctions { public static bool Vectorizable => typeof(T) == typeof(float); @@ -13179,6 +13846,368 @@ private static Vector512 ElementWiseSelect(Vector512 mask, Vector512 public static Vector512 Invoke(Vector512 x) => Vector512.Create(T.One) / (Vector512.Create(T.One) + ExpOperator.Invoke(-x)); } + internal readonly struct CeilingOperator : IUnaryOperator where T : IFloatingPoint + { + public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); + + public static T Invoke(T x) => T.Ceiling(x); + + public static Vector128 Invoke(Vector128 x) + { + if (typeof(T) == typeof(float)) + { + return Vector128.Ceiling(x.AsSingle()).As(); + } + + if (typeof(T) == typeof(double)) + { + return Vector128.Ceiling(x.AsDouble()).As(); + } + + throw new NotSupportedException(); + } + + public static Vector256 Invoke(Vector256 x) + { + if (typeof(T) == typeof(float)) + { + return Vector256.Ceiling(x.AsSingle()).As(); + } + + if (typeof(T) == typeof(double)) + { + return Vector256.Ceiling(x.AsDouble()).As(); + } + + throw new NotSupportedException(); + } + + public static Vector512 Invoke(Vector512 x) + { + if (typeof(T) == typeof(float)) + { + return Vector512.Ceiling(x.AsSingle()).As(); + } + + if (typeof(T) == typeof(double)) + { + return Vector512.Ceiling(x.AsDouble()).As(); + } + + throw new NotSupportedException(); + } + } + + internal readonly struct FloorOperator : IUnaryOperator where T : IFloatingPoint + { + public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); + + public static T Invoke(T x) => T.Floor(x); + + public static Vector128 Invoke(Vector128 x) + { + if (typeof(T) == typeof(float)) + { + return Vector128.Floor(x.AsSingle()).As(); + } + + if (typeof(T) == typeof(double)) + { + return Vector128.Floor(x.AsDouble()).As(); + } + + throw new NotSupportedException(); + } + + public static Vector256 Invoke(Vector256 x) + { + if (typeof(T) == typeof(float)) + { + return Vector256.Floor(x.AsSingle()).As(); + } + + if (typeof(T) == typeof(double)) + { + return Vector256.Floor(x.AsDouble()).As(); + } + + throw new NotSupportedException(); + } + + public static Vector512 Invoke(Vector512 x) + { + if (typeof(T) == typeof(float)) + { + return Vector512.Floor(x.AsSingle()).As(); + } + + if (typeof(T) == typeof(double)) + { + return Vector512.Floor(x.AsDouble()).As(); + } + + throw new NotSupportedException(); + } + } + + private readonly struct TruncateOperator : IUnaryOperator where T : IFloatingPoint + { + public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); + + public static T Invoke(T x) => T.Truncate(x); + + public static Vector128 Invoke(Vector128 x) + { + if (typeof(T) == typeof(float)) + { + if (Sse41.IsSupported) + { + return Sse41.RoundToZero(x.AsSingle()).As(); + } + + if (AdvSimd.IsSupported) + { + return AdvSimd.RoundToZero(x.AsSingle()).As(); + } + + return Vector128.ConditionalSelect(Vector128.GreaterThanOrEqual(x, Vector128.Zero), + Vector128.Floor(x.AsSingle()).As(), + Vector128.Ceiling(x.AsSingle()).As()); + } + + if (typeof(T) == typeof(double)) + { + if (Sse41.IsSupported) + { + return Sse41.RoundToZero(x.AsDouble()).As(); + } + + return Vector128.ConditionalSelect(Vector128.GreaterThanOrEqual(x, Vector128.Zero), + Vector128.Floor(x.AsDouble()).As(), + Vector128.Ceiling(x.AsDouble()).As()); + } + + throw new NotSupportedException(); + } + + public static Vector256 Invoke(Vector256 x) + { + if (typeof(T) == typeof(float)) + { + if (Avx.IsSupported) + { + return Avx.RoundToZero(x.AsSingle()).As(); + } + + return Vector256.ConditionalSelect(Vector256.GreaterThanOrEqual(x, Vector256.Zero), + Vector256.Floor(x.AsSingle()).As(), + Vector256.Ceiling(x.AsSingle()).As()); + } + + if (typeof(T) == typeof(double)) + { + if (Avx.IsSupported) + { + return Avx.RoundToZero(x.AsDouble()).As(); + } + + return Vector256.ConditionalSelect(Vector256.GreaterThanOrEqual(x, Vector256.Zero), + Vector256.Floor(x.AsDouble()).As(), + Vector256.Ceiling(x.AsDouble()).As()); + } + + throw new NotSupportedException(); + } + + public static Vector512 Invoke(Vector512 x) + { + if (typeof(T) == typeof(float)) + { + if (Avx512F.IsSupported) + { + return Avx512F.RoundScale(x.AsSingle(), 0b11).As(); + } + + return Vector512.ConditionalSelect(Vector512.GreaterThanOrEqual(x, Vector512.Zero), + Vector512.Floor(x.AsSingle()).As(), + Vector512.Ceiling(x.AsSingle()).As()); + } + + if (typeof(T) == typeof(double)) + { + if (Avx512F.IsSupported) + { + return Avx512F.RoundScale(x.AsDouble(), 0b11).As(); + } + + return Vector512.ConditionalSelect(Vector512.GreaterThanOrEqual(x, Vector512.Zero), + Vector512.Floor(x.AsDouble()).As(), + Vector512.Ceiling(x.AsDouble()).As()); + } + + throw new NotSupportedException(); + } + } + + /// T.PopCount(x) + internal readonly struct PopCountOperator : IUnaryOperator where T : IBinaryInteger + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.PopCount(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.LeadingZeroCount(x) + internal readonly struct LeadingZeroCountOperator : IUnaryOperator where T : IBinaryInteger + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.LeadingZeroCount(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + /// T.TrailingZeroCount(x) + internal readonly struct TrailingZeroCountOperator : IUnaryOperator where T : IBinaryInteger + { + public static bool Vectorizable => false; // TODO: Vectorize + public static T Invoke(T x) => T.TrailingZeroCount(x); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + } + + private readonly struct CopySignOperator : IBinaryOperator where T : INumber + { + // This method is required to work for all inputs, including NaN, so we operate on the raw bits. + // Remove the sign from x, and remove everything but the sign from y. + // Then OR them to get the correct sign. + private const int SingleSignMask = 1 << 31; + private const long DoubleSignMask = 1L << 63; + + public static bool Vectorizable => true; + + public static T Invoke(T x, T y) => T.CopySign(x, y); + + public static Vector128 Invoke(Vector128 x, Vector128 y) + { + if (typeof(T) == typeof(float)) + { + return + ((x.AsInt32() & Vector128.Create(~SingleSignMask)) | + y.AsInt32() & Vector128.Create(SingleSignMask)).As(); + } + + if (typeof(T) == typeof(double)) + { + return + ((x.AsInt64() & Vector128.Create(~DoubleSignMask)) | + y.AsInt64() & Vector128.Create(DoubleSignMask)).As(); + } + + if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint)) + { + Vector128 absValue = Vector128.Abs(x); + Vector128 sign = Vector128.GreaterThanOrEqual(y, Vector128.Zero); + Vector128 error = sign & Vector128.LessThan(absValue, Vector128.Zero); + if (error != Vector128.Zero) + { + Math.Abs(int.MinValue); // throw OverflowException + } + + return Vector128.ConditionalSelect(sign, absValue, -absValue); + } + + return x; + } + + public static Vector256 Invoke(Vector256 x, Vector256 y) + { + if (typeof(T) == typeof(float)) + { + return + ((x.AsInt32() & Vector256.Create(~SingleSignMask)) | + y.AsInt32() & Vector256.Create(SingleSignMask)).As(); + } + + if (typeof(T) == typeof(double)) + { + return + ((x.AsInt64() & Vector256.Create(~DoubleSignMask)) | + y.AsInt64() & Vector256.Create(DoubleSignMask)).As(); + } + + if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint)) + { + Vector256 absValue = Vector256.Abs(x); + Vector256 sign = Vector256.GreaterThanOrEqual(y, Vector256.Zero); + Vector256 error = sign & Vector256.LessThan(absValue, Vector256.Zero); + if (error != Vector256.Zero) + { + Math.Abs(int.MinValue); // throw OverflowException + } + + return Vector256.ConditionalSelect(sign, absValue, -absValue); + } + + return x; + } + + public static Vector512 Invoke(Vector512 x, Vector512 y) + { + if (typeof(T) == typeof(float)) + { + return + ((x.AsInt32() & Vector512.Create(~SingleSignMask)) | + y.AsInt32() & Vector512.Create(SingleSignMask)).As(); + } + + if (typeof(T) == typeof(double)) + { + return + ((x.AsInt64() & Vector512.Create(~DoubleSignMask)) | + y.AsInt64() & Vector512.Create(DoubleSignMask)).As(); + } + + if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint)) + { + Vector512 absValue = Vector512.Abs(x); + Vector512 sign = Vector512.GreaterThanOrEqual(y, Vector512.Zero); + Vector512 error = sign & Vector512.LessThan(absValue, Vector512.Zero); + if (error != Vector512.Zero) + { + Math.Abs(int.MinValue); // throw OverflowException + } + + return Vector512.ConditionalSelect(sign, absValue, -absValue); + } + + return x; + } + } + + /// T.DegreesToRadians(x) + internal readonly struct DegreesToRadiansOperator : IUnaryOperator where T : ITrigonometricFunctions + { + public static bool Vectorizable => true; + public static T Invoke(T x) => T.DegreesToRadians(x); + public static Vector128 Invoke(Vector128 x) => (x * T.Pi) / T.CreateChecked(180); + public static Vector256 Invoke(Vector256 x) => (x * T.Pi) / T.CreateChecked(180); + public static Vector512 Invoke(Vector512 x) => (x * T.Pi) / T.CreateChecked(180); + } + + /// T.RadiansToDegrees(x) + internal readonly struct RadiansToDegreesOperator : IUnaryOperator where T : ITrigonometricFunctions + { + public static bool Vectorizable => true; + public static T Invoke(T x) => T.RadiansToDegrees(x); + public static Vector128 Invoke(Vector128 x) => (x * T.CreateChecked(180)) / T.Pi; + public static Vector256 Invoke(Vector256 x) => (x * T.CreateChecked(180)) / T.Pi; + public static Vector512 Invoke(Vector512 x) => (x * T.CreateChecked(180)) / T.Pi; + } + /// Operator that takes one input value and returns a single value. private interface IUnaryOperator { @@ -13192,6 +14221,7 @@ private interface IUnaryOperator /// Operator that takes two input values and returns a single value. private interface IBinaryOperator { + static abstract bool Vectorizable { get; } static abstract T Invoke(T x, T y); static abstract Vector128 Invoke(Vector128 x, Vector128 y); static abstract Vector256 Invoke(Vector256 x, Vector256 y); diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs index fe32027099c7e..6b6d22aba468e 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs @@ -808,9 +808,11 @@ private static float MinMaxCore(ReadOnlySpan x, TMinMaxO private static readonly int[] s_0through7 = [0, 1, 2, 3, 4, 5, 6, 7]; - private static int IndexOfMinMaxCore(ReadOnlySpan x, TIndexOfMinMaxOperator op = default) + private static int IndexOfMinMaxCore(ReadOnlySpan x, TIndexOfMinMaxOperator op = default) where TIndexOfMinMaxOperator : struct, IIndexOfOperator { + Debug.Assert(typeof(T) == typeof(float), "The generic parameter exists only to provide the same signature as the generic implementation."); + if (x.IsEmpty) { return -1; diff --git a/src/libraries/System.Numerics.Tensors/src/System/ThrowHelper.cs b/src/libraries/System.Numerics.Tensors/src/System/ThrowHelper.cs index 272991aed44ab..380add4727e8d 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/ThrowHelper.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/ThrowHelper.cs @@ -9,7 +9,11 @@ internal static class ThrowHelper { [DoesNotReturn] public static void ThrowArgument_DestinationTooShort() => - throw new ArgumentException(SR.Argument_DestinationTooShort, "destination"); + ThrowArgument_DestinationTooShort("destination"); + + [DoesNotReturn] + public static void ThrowArgument_DestinationTooShort(string destinationName) => + throw new ArgumentException(SR.Argument_DestinationTooShort, destinationName); [DoesNotReturn] public static void ThrowArgument_SpansMustHaveSameLength() => diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index de0269ca88924..766aedf91195f 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -8,6 +8,11 @@ using Xunit; using Xunit.Sdk; +// Tests specific to .NET Core generic APIs. +// Some of the tests are written with functionality abstracted into helpers that provide the core operation: this +// is done when the tests are shared with legacy float-specific tests. Tests that don't need to be shared access +// the generic APIs directly. + namespace System.Numerics.Tensors.Tests { public class DoubleGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests { } @@ -30,6 +35,7 @@ public class UInt16GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests public class CharGenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } public class UInt32GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } public class UInt64GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } + public class UIntPtrGenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } public class UInt128GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } @@ -108,16 +114,858 @@ protected override void SetSpecialValues(Span x, Span y) x[pos] = T.Zero; y[pos] = T.NegativeZero; - // +Epsilon, -Epsilon - pos = Random.Next(x.Length); - x[pos] = T.Epsilon; - y[pos] = -T.Epsilon; + // +Epsilon, -Epsilon + pos = Random.Next(x.Length); + x[pos] = T.Epsilon; + y[pos] = -T.Epsilon; + + // Same magnitude, opposite sign + pos = Random.Next(x.Length); + x[pos] = T.CreateTruncating(5); + y[pos] = T.CreateTruncating(-5); + } + + #region Span -> Destination + public static IEnumerable SpanDestinationFunctionsToTest() + { + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Acosh), new Func(T.Acosh) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.AcosPi), new Func(T.AcosPi) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Acos), new Func(T.Acos) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Asinh), new Func(T.Asinh) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.AsinPi), new Func(T.AsinPi) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Asin), new Func(T.Asin) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Atanh), new Func(T.Atanh) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.AtanPi), new Func(T.AtanPi) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Atan), new Func(T.Atan) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Cbrt), new Func(T.Cbrt) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Ceiling), new Func(T.Ceiling) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Cos), new Func(T.Cos) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Cosh), new Func(T.Cosh) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.CosPi), new Func(T.CosPi) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.DegreesToRadians), new Func(T.DegreesToRadians) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp), new Func(T.Exp) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp2), new Func(T.Exp2) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp10), new Func(T.Exp10) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.ExpM1), new Func(T.ExpM1) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp2M1), new Func(T.Exp2M1) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp10M1), new Func(T.Exp10M1) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Floor), new Func(T.Floor) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log), new Func(T.Log) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log2), new Func(T.Log2) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log10), new Func(T.Log10) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.LogP1), new Func(T.LogP1) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log2P1), new Func(T.Log2P1) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log10P1), new Func(T.Log10P1) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.RadiansToDegrees), new Func(T.RadiansToDegrees) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Reciprocal), new Func(f => T.One / f) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.ReciprocalEstimate), new Func(T.ReciprocalEstimate) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.ReciprocalSqrt), new Func(f => T.One / T.Sqrt(f)) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.ReciprocalSqrtEstimate), new Func(T.ReciprocalSqrtEstimate) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Round), new Func(T.Round) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Sin), new Func(T.Sin) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Sinh), new Func(T.Sinh) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.SinPi), new Func(T.SinPi) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Sqrt), new Func(T.Sqrt) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Tan), new Func(T.Tan) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Tanh), new Func(T.Tanh) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.TanPi), new Func(T.TanPi) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Truncate), new Func(T.Truncate) }; + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_AllLengths(SpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x.Span, destination.Span); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i]), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_InPlace(SpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + + tensorPrimitivesMethod(x.Span, x.Span); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i]), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_SpecialValues(SpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + RunForEachSpecialValue(() => + { + tensorPrimitivesMethod(x.Span, destination.Span); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i]), destination[i]); + } + }, x); + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_ValueRange(SpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(VectorLengthAndIteratedRange(ConvertFromSingle(-100f), ConvertFromSingle(100f), ConvertFromSingle(3f)), arg => + { + T[] x = new T[arg.Length]; + T[] dest = new T[arg.Length]; + + x.AsSpan().Fill(arg.Element); + tensorPrimitivesMethod(x.AsSpan(), dest.AsSpan()); + + T expected = expectedMethod(arg.Element); + foreach (T actual in dest) + { + AssertEqualTolerance(expected, actual); + } + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_ThrowsForTooShortDestination(SpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x.Span, destination.Span)); + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_ThrowsForOverlapppingInputsWithOutputs(SpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(2, 2))); + } + #endregion + + #region Span,Span -> Destination + public static IEnumerable SpanSpanDestinationFunctionsToTest() + { + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Atan2), new Func(T.Atan2) }; + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Atan2Pi), new Func(T.Atan2Pi) }; + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.CopySign), new Func(T.CopySign) }; + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Hypot), new Func(T.Hypot) }; + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Ieee754Remainder), new Func(T.Ieee754Remainder) }; + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Log), new Func(T.Log) }; + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Pow), new Func(T.Pow) }; + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_AllLengths(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i], y[i]), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_InPlace(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + + tensorPrimitivesMethod(x, x, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i], xOrig[i]), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_ThrowsForMismatchedLengths(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength - 1); + using BoundedMemory destination = CreateTensor(tensorLength); + + Assert.Throws(() => tensorPrimitivesMethod(x, y, destination)); + Assert.Throws(() => tensorPrimitivesMethod(y, x, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_ThrowsForTooShortDestination(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(4, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(6, 2))); + } + #endregion + + #region Span,Scalar -> Destination + public static IEnumerable SpanScalarDestinationFunctionsToTest() + { + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Pow), new Func(T.Pow) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Log), new Func(T.Log) }; + } + + [Theory] + [MemberData(nameof(SpanScalarDestinationFunctionsToTest))] + public void SpanScalarDestination_AllLengths(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i], y), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanScalarDestinationFunctionsToTest))] + public void SpanScalarDestination_InPlace(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + T[] xOrig = x.Span.ToArray(); + + tensorPrimitivesMethod(x, y, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i], y), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanScalarDestinationFunctionsToTest))] + public void SpanScalarDestination_ThrowsForTooShortDestination(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanScalarDestinationFunctionsToTest))] + public void SpanScalarDestination_ThrowsForOverlapppingInputsWithOutputs(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(2, 2))); + } + #endregion + + #region Span,Int,Span -> Destination + public static IEnumerable SpanIntDestinationFunctionsToTest() + { + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.RootN), new Func(T.RootN) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.ScaleB), new Func(T.ScaleB) }; + } + + [Theory] + [MemberData(nameof(SpanIntDestinationFunctionsToTest))] + public void SpanIntDestination_AllLengths(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + int y = Random.Next(1, 10); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i], y), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanIntDestinationFunctionsToTest))] + public void SpanIntDestination_InPlace(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + int y = Random.Next(1, 10); + + tensorPrimitivesMethod(x, y, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i], y), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanIntDestinationFunctionsToTest))] + public void SpanIntDestination_ThrowsForTooShortDestination(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + int y = 2; + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanIntDestinationFunctionsToTest))] + public void SpanIntDestination_ThrowsForOverlapppingInputsWithOutputs(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), 2, array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), 2, array.AsSpan(2, 2))); + } + #endregion + + #region Span,Span,Span -> Destination + public static IEnumerable SpanSpanSpanDestinationFunctionsToTest() + { + yield return new object[] { new SpanSpanSpanDestinationDelegate(TensorPrimitives.Lerp), new Func(T.Lerp) }; + yield return new object[] { new SpanSpanSpanDestinationDelegate(TensorPrimitives.MultiplyAddEstimate), new Func(T.FusedMultiplyAdd) }; + } + + [Theory] + [MemberData(nameof(SpanSpanSpanDestinationFunctionsToTest))] + public void SpanSpanSpanDestination_AllLengths(SpanSpanSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory z = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, z, destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i], y[i], z[i]), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanSpanSpanDestinationFunctionsToTest))] + public void SpanSpanSpanDestination_InPlace(SpanSpanSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + + tensorPrimitivesMethod(x, x, x, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i], xOrig[i], xOrig[i]), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanSpanSpanDestinationFunctionsToTest))] + public void SpanSpanSpanDestination_ThrowsForMismatchedLengths(SpanSpanSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory z = CreateAndFillTensor(tensorLength - 1); + using BoundedMemory destination = CreateTensor(tensorLength); + + Assert.Throws(() => tensorPrimitivesMethod(x, y, z, destination)); + Assert.Throws(() => tensorPrimitivesMethod(x, z, y, destination)); + Assert.Throws(() => tensorPrimitivesMethod(y, x, z, destination)); + Assert.Throws(() => tensorPrimitivesMethod(y, z, x, destination)); + Assert.Throws(() => tensorPrimitivesMethod(z, x, y, destination)); + Assert.Throws(() => tensorPrimitivesMethod(z, y, x, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanSpanSpanDestinationFunctionsToTest))] + public void SpanSpanSpanDestination_ThrowsForTooShortDestination(SpanSpanSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory z = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, z, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanSpanSpanDestinationFunctionsToTest))] + public void SpanSpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSpanSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(7, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(7, 2), array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(7, 2), array.AsSpan(4, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(7, 2), array.AsSpan(6, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(7, 2), array.AsSpan(8, 2))); + } + #endregion + + #region Span -> Destination, Destination + public static IEnumerable SpanDestinationDestinationFunctionsToTest() + { + yield return new object[] { new SpanDestinationDestinationDelegate(TensorPrimitives.SinCos), new Func(T.SinCos) }; + yield return new object[] { new SpanDestinationDestinationDelegate(TensorPrimitives.SinCosPi), new Func(T.SinCosPi) }; + } + + [Theory] + [MemberData(nameof(SpanDestinationDestinationFunctionsToTest))] + public void SpanDestinationDestinationFunctions_AllLengths(SpanDestinationDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination1 = CreateTensor(tensorLength); + using BoundedMemory destination2 = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x.Span, destination1.Span, destination2.Span); + + for (int i = 0; i < tensorLength; i++) + { + (T expected1, T expected2) = expectedMethod(x[i]); + AssertEqualTolerance(expected1, destination1[i]); + AssertEqualTolerance(expected2, destination2[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationDestinationFunctionsToTest))] + public void SpanDestinationDestinationFunctions_InPlace(SpanDestinationDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + using BoundedMemory destination2 = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x.Span, x.Span, destination2.Span); + + for (int i = 0; i < tensorLength; i++) + { + (T expected1, T expected2) = expectedMethod(xOrig[i]); + AssertEqualTolerance(expected1, x[i]); + AssertEqualTolerance(expected2, destination2[i]); + } + }); + + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + using BoundedMemory destination1 = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x.Span, destination1.Span, x.Span); + + for (int i = 0; i < tensorLength; i++) + { + (T expected1, T expected2) = expectedMethod(xOrig[i]); + AssertEqualTolerance(expected1, destination1[i]); + AssertEqualTolerance(expected2, x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationDestinationFunctionsToTest))] + public void SpanDestinationDestinationFunctions_SpecialValues(SpanDestinationDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination1 = CreateTensor(tensorLength); + using BoundedMemory destination2 = CreateTensor(tensorLength); + + RunForEachSpecialValue(() => + { + tensorPrimitivesMethod(x.Span, destination1.Span, destination2.Span); + for (int i = 0; i < tensorLength; i++) + { + (T expected1, T expected2) = expectedMethod(x[i]); + AssertEqualTolerance(expected1, destination1[i]); + AssertEqualTolerance(expected2, destination2[i]); + } + }, x); + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationDestinationFunctionsToTest))] + public void SpanDestinationDestinationFunctions_ValueRange(SpanDestinationDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(VectorLengthAndIteratedRange(ConvertFromSingle(-100f), ConvertFromSingle(100f), ConvertFromSingle(3f)), arg => + { + T[] x = new T[arg.Length]; + T[] dest1 = new T[arg.Length]; + T[] dest2 = new T[arg.Length]; + + x.AsSpan().Fill(arg.Element); + tensorPrimitivesMethod(x.AsSpan(), dest1.AsSpan(), dest2.AsSpan()); + + (T expected1, T expected2) = expectedMethod(arg.Element); + foreach (T actual in dest1) + { + AssertEqualTolerance(expected1, actual); + } + foreach (T actual in dest2) + { + AssertEqualTolerance(expected2, actual); + } + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationDestinationFunctionsToTest))] + public void SpanDestinationDestinationFunctions_ThrowsForTooShortDestination(SpanDestinationDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination1 = CreateTensor(tensorLength - 1); + using BoundedMemory destination2 = CreateTensor(tensorLength); + + Assert.Throws(() => tensorPrimitivesMethod(x.Span, destination1.Span, destination2.Span)); + }); + + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination1 = CreateTensor(tensorLength); + using BoundedMemory destination2 = CreateTensor(tensorLength - 1); + + Assert.Throws(() => tensorPrimitivesMethod(x.Span, destination1.Span, destination2.Span)); + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationDestinationFunctionsToTest))] + public void SpanDestinationDestinationFunctions_ThrowsForOverlapppingInputsWithOutputs(SpanDestinationDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + Assert.Throws(() => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(0, 2), array.AsSpan(4, 2))); + Assert.Throws(() => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(2, 2), array.AsSpan(4, 2))); + Assert.Throws(() => tensorPrimitivesMethod(array.AsSpan(3, 2), array.AsSpan(0, 2), array.AsSpan(4, 2))); + Assert.Throws(() => tensorPrimitivesMethod(array.AsSpan(5, 2), array.AsSpan(0, 2), array.AsSpan(4, 2))); + } + #endregion + + #region ILogB + [Fact] + public void ILogB_AllLengths() + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = BoundedMemory.Allocate(tensorLength); + + TensorPrimitives.ILogB(x.Span, destination.Span); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(T.CreateTruncating(T.ILogB(x[i])), T.CreateTruncating(destination[i])); + } + }); + } + + [Fact] + public void ILogB_SpecialValues() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = BoundedMemory.Allocate(tensorLength); + + RunForEachSpecialValue(() => + { + TensorPrimitives.ILogB(x.Span, destination.Span); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(T.CreateTruncating(T.ILogB(x[i])), T.CreateTruncating(destination[i])); + } + }, x); + }); + } + + [Fact] + public void ILogB_ThrowsForTooShortDestination() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = BoundedMemory.Allocate(tensorLength - 1); + + AssertExtensions.Throws("destination", () => TensorPrimitives.ILogB(x.Span, destination.Span)); + }); + } + #endregion + + #region MultiplyAddEstimate + [Fact] + public void MultiplyAddEstimate_TensorTensorScalar_AllLengths() + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + T addend = NextRandom(); + using BoundedMemory destination = CreateTensor(tensorLength); + + TensorPrimitives.MultiplyAddEstimate(x, y, addend, destination); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance((x[i] * y[i]) + addend, destination[i]); + } + }); + } + + [Fact] + public void MultiplyAddEstimate_TensorTensorScalar_InPlace() + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + T addend = NextRandom(); + + TensorPrimitives.MultiplyAddEstimate(x, x, addend, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance((xOrig[i] * xOrig[i]) + addend, x[i]); + } + }); + } + + [Fact] + public void MultiplyAddEstimate_TensorTensorScalar_ThrowsForTooShortDestination() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + T addend = NextRandom(); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(x, y, addend, destination)); + }); + } + + [Fact] + public void MultiplyAddEstimate_TensorTensorScalar_ThrowsForOverlapppingInputsWithOutputs() + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(3, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(5, 2))); + } + + [Fact] + public void MultiplyAddEstimate_TensorScalarTensor() + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + using BoundedMemory addend = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + TensorPrimitives.MultiplyAddEstimate(x, y, addend, destination); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance((x[i] * y) + addend[i], destination[i]); + } + }); + } + + [Fact] + public void MultiplyAddEstimate_TensorScalarTensor_InPlace() + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + T y = NextRandom(); + + TensorPrimitives.MultiplyAddEstimate(x, y, x, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance((xOrig[i] * y) + xOrig[i], x[i]); + } + }); + } + + [Fact] + public void MultiplyAddEstimate_TensorScalarTensor_ThrowsForTooShortDestination() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + using BoundedMemory addend = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(x, y, addend, destination)); + }); + } + + [Fact] + public void MultiplyAddEstimate_TensorScalarTensor_ThrowsForOverlapppingInputsWithOutputs() + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(3, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(5, 2))); + } + #endregion + + #region Round + public static IEnumerable RoundData() + { + foreach (MidpointRounding mode in Enum.GetValues(typeof(MidpointRounding))) + { + foreach (int digits in new[] { 0, 1, 4 }) + { + yield return new object[] { mode, digits }; + } + } + } + + [Theory] + [MemberData(nameof(RoundData))] + public void Round_AllLengths(MidpointRounding mode, int digits) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + if (digits == 0) + { + if (mode == MidpointRounding.ToEven) + { + TensorPrimitives.Round(x.Span, destination.Span); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(T.Round(x[i]), destination[i]); + } + } + + TensorPrimitives.Round(x.Span, mode, destination.Span); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(T.Round(x[i], mode), destination[i]); + } + } + + if (mode == MidpointRounding.ToEven) + { + TensorPrimitives.Round(x.Span, digits, destination.Span); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(T.Round(x[i], digits), destination[i]); + } + } - // Same magnitude, opposite sign - pos = Random.Next(x.Length); - x[pos] = T.CreateTruncating(5); - y[pos] = T.CreateTruncating(-5); + TensorPrimitives.Round(x.Span, digits, mode, destination.Span); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(T.Round(x[i], digits, mode), destination[i]); + } + }); } + #endregion } public unsafe abstract class GenericSignedIntegerTensorPrimitivesTests : GenericIntegerTensorPrimitivesTests @@ -134,7 +982,7 @@ public void Abs_MinValue_Throws() FillTensor(x.Span, T.MinValue); x[^1] = T.MinValue; - Assert.Throws(() => Abs(x, destination)); + Assert.Throws(() => TensorPrimitives.Abs(x.Span, destination.Span)); }); } @@ -148,7 +996,7 @@ public void SumOfMagnitudes_MinValue_Throws() FillTensor(x.Span, T.MinValue); x[^1] = T.MinValue; - Assert.Throws(() => SumOfMagnitudes(x)); + Assert.Throws(() => TensorPrimitives.SumOfMagnitudes(x.Span)); }); } } @@ -168,8 +1016,7 @@ public void Divide_TwoTensors_ByZero_Throws() FillTensor(y.Span, T.Zero); y[^1] = T.Zero; - Exception e = Record.Exception(() => Divide(x, y, destination)); - Assert.True(e is DivideByZeroException or ArgumentOutOfRangeException); // TODO https://github.com/dotnet/runtime/issues/94593: Fix exception type + Assert.Throws(() => TensorPrimitives.Divide(x.Span, y.Span, destination.Span)); }); } @@ -181,10 +1028,402 @@ public void Divide_TensorScalar_ByZero_Throw() using BoundedMemory x = CreateAndFillTensor(tensorLength); using BoundedMemory destination = CreateTensor(tensorLength); - Exception e = Record.Exception(() => Divide(x, T.Zero, destination)); - Assert.True(e is DivideByZeroException or ArgumentOutOfRangeException); // TODO https://github.com/dotnet/runtime/issues/94593: Fix exception type + Assert.Throws(() => TensorPrimitives.Divide(x, T.Zero, destination)); + }); + } + + [Fact] + public void Divide_ScalarTensor_ByZero_Throw() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + x[Random.Next(x.Length)] = T.Zero; + + Assert.Throws(() => TensorPrimitives.Divide(T.One, x, destination)); + }); + } + + #region Span -> Destination + public static IEnumerable SpanDestinationFunctionsToTest() + { + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.OnesComplement), new Func(i => ~i) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.PopCount), new Func(T.PopCount) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.LeadingZeroCount), new Func(T.LeadingZeroCount) }; + yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.TrailingZeroCount), new Func(T.TrailingZeroCount) }; + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_AllLengths(SpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, destination); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i]), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_InPlace(SpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + + tensorPrimitivesMethod(x, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i]), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_ThrowsForTooShortDestination(SpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanDestinationFunctionsToTest))] + public void SpanDestinationFunctions_ThrowsForOverlapppingInputsWithOutputs(SpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(2, 2))); + } + #endregion + + #region Span,Span -> Destination + public static IEnumerable SpanSpanDestinationFunctionsToTest() + { + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.BitwiseAnd), new Func((x, y) => x & y) }; + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.BitwiseOr), new Func((x, y) => x | y) }; + yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Xor), new Func((x, y) => x ^ y) }; + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_AllLengths(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i], y[i]), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_InPlace(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + + tensorPrimitivesMethod(x, x, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i], xOrig[i]), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_ThrowsForMismatchedLengths(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength - 1); + using BoundedMemory destination = CreateTensor(tensorLength); + + Assert.Throws(() => tensorPrimitivesMethod(x, y, destination)); + Assert.Throws(() => tensorPrimitivesMethod(y, x, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_ThrowsForTooShortDestination(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanSpanDestinationFunctionsToTest))] + public void SpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(4, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(6, 2))); + } + #endregion + + #region Span,Scalar -> Destination + public static IEnumerable SpanScalarDestinationFunctionsToTest() + { + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.BitwiseAnd), new Func((x, y) => x & y) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.BitwiseOr), new Func((x, y) => x | y) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Xor), new Func((x, y) => x ^ y) }; + } + + [Theory] + [MemberData(nameof(SpanScalarDestinationFunctionsToTest))] + public void SpanScalarDestination_AllLengths(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i], y), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanScalarDestinationFunctionsToTest))] + public void SpanScalarDestination_InPlace(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + T[] xOrig = x.Span.ToArray(); + + tensorPrimitivesMethod(x, y, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i], y), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanScalarDestinationFunctionsToTest))] + public void SpanScalarDestination_ThrowsForTooShortDestination(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanScalarDestinationFunctionsToTest))] + public void SpanScalarDestination_ThrowsForOverlapppingInputWithOutputs(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + T y = NextRandom(); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), y, array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), y, array.AsSpan(2, 2))); + } + #endregion + + #region Shifting/Rotating + public static IEnumerable ShiftRotateDestinationFunctionsToTest() + { + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.ShiftLeft), new Func((x, n) => x << n) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.ShiftRightArithmetic), new Func((x, n) => x >> n) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.ShiftRightLogical), new Func((x, n) => x >>> n) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.RotateLeft), new Func(T.RotateLeft) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.RotateRight), new Func(T.RotateRight) }; + } + + [Theory] + [MemberData(nameof(ShiftRotateDestinationFunctionsToTest))] + public void ShiftRotateDestination_AllLengths(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + int y = Random.Next(0, T.MaxValue.GetByteCount() * 8); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i], y), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(ShiftRotateDestinationFunctionsToTest))] + public void ShiftRotateDestination_InPlace(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + int y = Random.Next(0, T.MaxValue.GetByteCount() * 8); + T[] xOrig = x.Span.ToArray(); + + tensorPrimitivesMethod(x, y, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i], y), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(ShiftRotateDestinationFunctionsToTest))] + public void ShiftRotateDestination_ThrowsForTooShortDestination(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, default, destination)); + }); + } + + [Theory] + [MemberData(nameof(ShiftRotateDestinationFunctionsToTest))] + public void ShiftRotateDestination_ThrowsForOverlapppingInputWithOutputs(SpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(2, 2))); + } + #endregion + + #region CopySign + private void RemoveSignedMinValue(Span span) + { + for (int i = 0; i < span.Length; i++) + { + while (T.Sign(span[i]) < 0 && span[i] == T.MinValue) + { + span[i] = NextRandom(); + } + } + } + + [Fact] + public void CopySign_AllLengths() + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + RemoveSignedMinValue(x); // CopySign doesn't work with MinValue for signed integers, so remove any MinValue values from the input. + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + TensorPrimitives.CopySign(x, y, destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(T.CopySign(x[i], y[i]), destination[i]); + } + }); + } + + [Fact] + public void CopySign_InPlace() + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + RemoveSignedMinValue(x); // CopySign doesn't work with MinValue for signed integers, so remove any MinValue values from the input. + + T[] xOrig = x.Span.ToArray(); + + TensorPrimitives.CopySign(x, x, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(T.CopySign(xOrig[i], xOrig[i]), x[i]); + } + }); + } + + [Fact] + public void CopySign_ThrowsForMismatchedLengths() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength - 1); + using BoundedMemory destination = CreateTensor(tensorLength); + + Assert.Throws(() => TensorPrimitives.CopySign(x, y, destination)); + Assert.Throws(() => TensorPrimitives.CopySign(y, x, destination)); + }); + } + + [Fact] + public void CopySign_ThrowsForTooShortDestination() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(x, y, destination)); }); } + + [Fact] + public void CopySign_ThrowsForOverlapppingInputsWithOutputs() + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(4, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(6, 2))); + } + #endregion } public unsafe abstract class GenericNumberTensorPrimitivesTests : TensorPrimitivesTests @@ -203,6 +1442,10 @@ public unsafe abstract class GenericNumberTensorPrimitivesTests : TensorPrimi protected override void Divide(ReadOnlySpan x, T y, Span destination) => TensorPrimitives.Divide(x, y, destination); protected override T Divide(T x, T y) => x / y; protected override T Dot(ReadOnlySpan x, ReadOnlySpan y) => TensorPrimitives.Dot(x, y); + protected override int IndexOfMax(ReadOnlySpan x) => TensorPrimitives.IndexOfMax(x); + protected override int IndexOfMaxMagnitude(ReadOnlySpan x) => TensorPrimitives.IndexOfMaxMagnitude(x); + protected override int IndexOfMin(ReadOnlySpan x) => TensorPrimitives.IndexOfMin(x); + protected override int IndexOfMinMagnitude(ReadOnlySpan x) => TensorPrimitives.IndexOfMinMagnitude(x); protected override T Max(ReadOnlySpan x) => TensorPrimitives.Max(x); protected override void Max(ReadOnlySpan x, ReadOnlySpan y, Span destination) => TensorPrimitives.Max(x, y, destination); protected override T Max(T x, T y) => T.Max(x, y); @@ -291,5 +1534,76 @@ protected override void AssertEqualTolerance(T expected, T actual, T tolerance) protected override T NaN => throw new NotSupportedException(); protected override IEnumerable GetSpecialValues() => Enumerable.Empty(); protected override void SetSpecialValues(Span x, Span y) { } + + #region Scalar,Span -> Destination + public static IEnumerable ScalarSpanDestinationFunctionsToTest() + { + yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Divide), new Func((x, y) => x / y) }; + yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Subtract), new Func((x, y) => x - y) }; + } + + [Theory] + [MemberData(nameof(ScalarSpanDestinationFunctionsToTest))] + public void ScalarSpanDestination_AllLengths(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + T x = NextRandom(); + using BoundedMemory y = CreateTensor(tensorLength); + FillTensor(y.Span, default); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, destination); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x, y[i]), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(ScalarSpanDestinationFunctionsToTest))] + public void ScalarSpanDestination_InPlace(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + T x = NextRandom(); + using BoundedMemory y = CreateTensor(tensorLength); + FillTensor(y.Span, default); + T[] yOrig = y.Span.ToArray(); + + tensorPrimitivesMethod(x, y.Span, y.Span); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x, yOrig[i]), y[i]); + } + }); + } + + [Theory] + [MemberData(nameof(ScalarSpanDestinationFunctionsToTest))] + public void ScalarSpanDestination_ThrowsForTooShortDestination(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + T x = NextRandom(); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, destination)); + }); + } + + [Theory] + [MemberData(nameof(ScalarSpanDestinationFunctionsToTest))] + public void ScalarSpanDestination_ThrowsForOverlapppingInputsWithOutputs(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(default, array.AsSpan(4, 2), array.AsSpan(3, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(default, array.AsSpan(4, 2), array.AsSpan(5, 2))); + } + #endregion } } diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.NonGeneric.Single.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.NonGeneric.Single.cs index 50e3b3dae77d4..85fb2b955cceb 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.NonGeneric.Single.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.NonGeneric.Single.cs @@ -1,13 +1,11 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers; using System.Collections.Generic; -using System.Linq; -using System.Runtime.InteropServices; -using Xunit; using Xunit.Sdk; +// Tests specific to .NET Standard non-generic APIs + namespace System.Numerics.Tensors.Tests { public unsafe class NonGenericSingleTensorPrimitivesTests : TensorPrimitivesTests @@ -35,6 +33,10 @@ public unsafe class NonGenericSingleTensorPrimitivesTests : TensorPrimitivesTest protected override void Log(ReadOnlySpan x, Span destination) => TensorPrimitives.Log(x, destination); protected override float Log2(float x) => MathF.Log(x, 2); protected override void Log2(ReadOnlySpan x, Span destination) => TensorPrimitives.Log2(x, destination); + protected override int IndexOfMax(ReadOnlySpan x) => TensorPrimitives.IndexOfMax(x); + protected override int IndexOfMaxMagnitude(ReadOnlySpan x) => TensorPrimitives.IndexOfMaxMagnitude(x); + protected override int IndexOfMin(ReadOnlySpan x) => TensorPrimitives.IndexOfMin(x); + protected override int IndexOfMinMagnitude(ReadOnlySpan x) => TensorPrimitives.IndexOfMinMagnitude(x); protected override float Max(ReadOnlySpan x) => TensorPrimitives.Max(x); protected override void Max(ReadOnlySpan x, ReadOnlySpan y, Span destination) => TensorPrimitives.Max(x, y, destination); protected override float Max(float x, float y) => MathF.Max(x, y); @@ -174,203 +176,5 @@ protected override void SetSpecialValues(Span x, Span y) } private static unsafe float UInt32ToSingle(uint i) => *(float*)&i; - - // TODO: Move these IndexOf tests to the base class once generic versions are implemented. - #region IndexOfMax - [Fact] - public void IndexOfMax_ReturnsNegative1OnEmpty() - { - Assert.Equal(-1, TensorPrimitives.IndexOfMax(ReadOnlySpan.Empty)); - } - - [Fact] - public void IndexOfMax() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - x[expected] = Enumerable.Max(MemoryMarshal.ToEnumerable(x.Memory)) + 1; - Assert.Equal(expected, TensorPrimitives.IndexOfMax(x)); - } - }); - } - - [Fact] - public void IndexOfMax_FirstNaNReturned() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - x[expected] = float.NaN; - x[tensorLength - 1] = float.NaN; - Assert.Equal(expected, TensorPrimitives.IndexOfMax(x)); - } - }); - } - - [Fact] - public void IndexOfMax_Negative0LesserThanPositive0() - { - Assert.Equal(1, TensorPrimitives.IndexOfMax([-0f, +0f])); - Assert.Equal(0, TensorPrimitives.IndexOfMax([-0f, -0f, -0f, -0f])); - Assert.Equal(4, TensorPrimitives.IndexOfMax([-0f, -0f, -0f, -0f, +0f, +0f, +0f])); - Assert.Equal(0, TensorPrimitives.IndexOfMax([+0f, -0f])); - Assert.Equal(1, TensorPrimitives.IndexOfMax([-1, -0f])); - Assert.Equal(2, TensorPrimitives.IndexOfMax([-1, -0f, 1])); - } - #endregion - - #region IndexOfMaxMagnitude - [Fact] - public void IndexOfMaxMagnitude_ReturnsNegative1OnEmpty() - { - Assert.Equal(-1, TensorPrimitives.IndexOfMaxMagnitude(ReadOnlySpan.Empty)); - } - - [Fact] - public void IndexOfMaxMagnitude() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - x[expected] = Enumerable.Max(MemoryMarshal.ToEnumerable(x.Memory), Math.Abs) + 1; - Assert.Equal(expected, TensorPrimitives.IndexOfMaxMagnitude(x)); - } - }); - } - - [Fact] - public void IndexOfMaxMagnitude_FirstNaNReturned() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - x[expected] = float.NaN; - x[tensorLength - 1] = float.NaN; - Assert.Equal(expected, TensorPrimitives.IndexOfMaxMagnitude(x)); - } - }); - } - - [Fact] - public void IndexOfMaxMagnitude_Negative0LesserThanPositive0() - { - Assert.Equal(0, TensorPrimitives.IndexOfMaxMagnitude([-0f, -0f, -0f, -0f])); - Assert.Equal(1, TensorPrimitives.IndexOfMaxMagnitude([-0f, +0f])); - Assert.Equal(1, TensorPrimitives.IndexOfMaxMagnitude([-0f, +0f, +0f, +0f])); - Assert.Equal(0, TensorPrimitives.IndexOfMaxMagnitude([+0f, -0f])); - Assert.Equal(0, TensorPrimitives.IndexOfMaxMagnitude([-1, -0f])); - Assert.Equal(2, TensorPrimitives.IndexOfMaxMagnitude([-1, -0f, 1])); - } - #endregion - - #region IndexOfMin - [Fact] - public void IndexOfMin_ReturnsNegative1OnEmpty() - { - Assert.Equal(-1, TensorPrimitives.IndexOfMin(ReadOnlySpan.Empty)); - } - - [Fact] - public void IndexOfMin() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - x[expected] = Enumerable.Min(MemoryMarshal.ToEnumerable(x.Memory)) - 1; - Assert.Equal(expected, TensorPrimitives.IndexOfMin(x)); - } - }); - } - - [Fact] - public void IndexOfMin_FirstNaNReturned() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - x[expected] = float.NaN; - x[tensorLength - 1] = float.NaN; - Assert.Equal(expected, TensorPrimitives.IndexOfMin(x)); - } - }); - } - - [Fact] - public void IndexOfMin_Negative0LesserThanPositive0() - { - Assert.Equal(0, TensorPrimitives.IndexOfMin([-0f, +0f])); - Assert.Equal(1, TensorPrimitives.IndexOfMin([+0f, -0f])); - Assert.Equal(1, TensorPrimitives.IndexOfMin([+0f, -0f, -0f, -0f, -0f])); - Assert.Equal(0, TensorPrimitives.IndexOfMin([-1, -0f])); - Assert.Equal(0, TensorPrimitives.IndexOfMin([-1, -0f, 1])); - } - #endregion - - #region IndexOfMinMagnitude - [Fact] - public void IndexOfMinMagnitude_ReturnsNegative1OnEmpty() - { - Assert.Equal(-1, TensorPrimitives.IndexOfMinMagnitude(ReadOnlySpan.Empty)); - } - - [Fact] - public void IndexOfMinMagnitude() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) - { - using BoundedMemory x = CreateTensor(tensorLength); - for (int i = 0; i < x.Length; i++) - { - x[i] = i % 2 == 0 ? 42 : -42; - } - - x[expected] = -41; - - Assert.Equal(expected, TensorPrimitives.IndexOfMinMagnitude(x)); - } - }); - } - - [Fact] - public void IndexOfMinMagnitude_FirstNaNReturned() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - x[expected] = float.NaN; - x[tensorLength - 1] = float.NaN; - Assert.Equal(expected, TensorPrimitives.IndexOfMinMagnitude(x)); - } - }); - } - - [Fact] - public void IndexOfMinMagnitude_Negative0LesserThanPositive0() - { - Assert.Equal(0, TensorPrimitives.IndexOfMinMagnitude([-0f, -0f, -0f, -0f])); - Assert.Equal(0, TensorPrimitives.IndexOfMinMagnitude([-0f, +0f])); - Assert.Equal(1, TensorPrimitives.IndexOfMinMagnitude([+0f, -0f])); - Assert.Equal(1, TensorPrimitives.IndexOfMinMagnitude([+0f, -0f, -0f, -0f])); - Assert.Equal(1, TensorPrimitives.IndexOfMinMagnitude([-1, -0f])); - Assert.Equal(1, TensorPrimitives.IndexOfMinMagnitude([-1, -0f, 1])); - } - #endregion } } diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs index 8cc159b5b3b14..747c67e915bea 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs @@ -7,6 +7,8 @@ using System.Runtime.InteropServices; using Xunit; +// Shared helpers and Facts/Theories used by both generic methods on .NET Core and non-generic methods on .NET Framework + namespace System.Numerics.Tensors.Tests { public abstract class TensorPrimitivesTests where T : unmanaged, IEquatable @@ -25,6 +27,10 @@ public abstract class TensorPrimitivesTests where T : unmanaged, IEquatable x, T y, Span destination); protected abstract T Dot(ReadOnlySpan x, ReadOnlySpan y); protected abstract void Exp(ReadOnlySpan x, Span destination); + protected abstract int IndexOfMax(ReadOnlySpan x); + protected abstract int IndexOfMaxMagnitude(ReadOnlySpan x); + protected abstract int IndexOfMin(ReadOnlySpan x); + protected abstract int IndexOfMinMagnitude(ReadOnlySpan x); protected abstract void Log(ReadOnlySpan x, Span destination); protected abstract void Log2(ReadOnlySpan x, Span destination); protected abstract T Max(ReadOnlySpan x); @@ -84,6 +90,14 @@ public abstract class TensorPrimitivesTests where T : unmanaged, IEquatable x, Span destination); + public delegate void SpanSpanDestinationDelegate(ReadOnlySpan x, ReadOnlySpan y, Span destination); + public delegate void SpanScalarDestinationDelegate(ReadOnlySpan x, T2 y, Span destination); + public delegate void ScalarSpanDestinationDelegate(T x, ReadOnlySpan y, Span destination); + public delegate void SpanSpanSpanDestinationDelegate(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan z, Span destination); + public delegate void SpanDestinationDestinationDelegate(ReadOnlySpan x, Span destination1, Span destination2); + protected virtual bool IsFloatingPoint => typeof(T) == typeof(float) || typeof(T) == typeof(double); protected abstract T ConvertFromSingle(float f); @@ -236,15 +250,6 @@ public void Add_TwoTensors() { AssertEqualTolerance(Add(x[i], y[i]), destination[i]); } - - T[] xOrig = x.Span.ToArray(); - - // Validate that the destination can be the same as an input. - Add(x, x, x); - for (int i = 0; i < tensorLength; i++) - { - AssertEqualTolerance(Add(xOrig[i], xOrig[i]), x[i]); - } }); } @@ -1047,6 +1052,265 @@ public void Exp_ThrowsForOverlapppingInputsWithOutputs() } #endregion + #region IndexOfMax + [Fact] + public void IndexOfMax_ReturnsNegative1OnEmpty() + { + Assert.Equal(-1, IndexOfMax(ReadOnlySpan.Empty)); + } + + [Fact] + public void IndexOfMax_AllLengths() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + x[expected] = Enumerable.Max(MemoryMarshal.ToEnumerable(x.Memory)); + int actual = IndexOfMax(x.Span); + Assert.True(actual == expected || (actual < expected && x[actual].Equals(x[expected])), $"{tensorLength} {actual} {expected} {string.Join(",", MemoryMarshal.ToEnumerable(x.Memory))}"); + } + }); + } + + [Fact] + public void IndexOfMax_FirstNaNReturned() + { + if (!IsFloatingPoint) return; + + Assert.All(Helpers.TensorLengths, tensorLength => + { + foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + x[expected] = ConvertFromSingle(float.NaN); + x[tensorLength - 1] = ConvertFromSingle(float.NaN); + Assert.Equal(expected, IndexOfMax(x.Span)); + } + }); + } + + [Fact] + public void IndexOfMax_Negative0LesserThanPositive0() + { + if (!IsFloatingPoint) return; + + Assert.Equal(1, IndexOfMax([ConvertFromSingle(-0f), ConvertFromSingle(+0f)])); + Assert.Equal(0, IndexOfMax([ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f)])); + Assert.Equal(4, IndexOfMax([ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(+0f), ConvertFromSingle(+0f), ConvertFromSingle(+0f)])); + Assert.Equal(0, IndexOfMax([ConvertFromSingle(+0f), ConvertFromSingle(-0f)])); + Assert.Equal(1, IndexOfMax([ConvertFromSingle(-1), ConvertFromSingle(-0f)])); + Assert.Equal(2, IndexOfMax([ConvertFromSingle(-1), ConvertFromSingle(-0f), ConvertFromSingle(1f)])); + } + #endregion + + #region IndexOfMaxMagnitude + [Fact] + public void IndexOfMaxMagnitude_ReturnsNegative1OnEmpty() + { + Assert.Equal(-1, IndexOfMaxMagnitude(ReadOnlySpan.Empty)); + } + + [Fact] + public void IndexOfMaxMagnitude_AllLengths() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) + { + using BoundedMemory x = CreateTensor(tensorLength); + FillTensor(x, MinValue); + + T max = x[0]; + for (int i = 0; i < x.Length; i++) + { + int compared = Comparer.Default.Compare(Abs(x[i]), Abs(max)); + if (compared > 0 || (compared == 0 && EqualityComparer.Default.Equals(x[i], max))) + { + max = x[i]; + } + } + x[expected] = max; + + int actual = IndexOfMaxMagnitude(x.Span); + + if (actual != expected) + { + Assert.True(actual < expected || Comparer.Default.Compare(x[actual], x[expected]) > 0, $"{tensorLength} {actual} {expected} {string.Join(",", MemoryMarshal.ToEnumerable(x.Memory))}"); + if (IsFloatingPoint) + { + AssertEqualTolerance(Abs(x[expected]), Abs(x[actual])); + } + else + { + Assert.Equal(Abs(x[expected]), Abs(x[actual])); + } + } + } + }); + } + + [Fact] + public void IndexOfMaxMagnitude_FirstNaNReturned() + { + if (!IsFloatingPoint) return; + + Assert.All(Helpers.TensorLengths, tensorLength => + { + foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + x[expected] = ConvertFromSingle(float.NaN); + x[tensorLength - 1] = ConvertFromSingle(float.NaN); + Assert.Equal(expected, IndexOfMaxMagnitude(x)); + } + }); + } + + [Fact] + public void IndexOfMaxMagnitude_Negative0LesserThanPositive0() + { + if (!IsFloatingPoint) return; + + Assert.Equal(0, IndexOfMaxMagnitude([ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f)])); + Assert.Equal(1, IndexOfMaxMagnitude([ConvertFromSingle(-0f), ConvertFromSingle(+0f)])); + Assert.Equal(1, IndexOfMaxMagnitude([ConvertFromSingle(-0f), ConvertFromSingle(+0f), ConvertFromSingle(+0f), ConvertFromSingle(+0f)])); + Assert.Equal(0, IndexOfMaxMagnitude([ConvertFromSingle(+0f), ConvertFromSingle(-0f)])); + Assert.Equal(0, IndexOfMaxMagnitude([ConvertFromSingle(-1), ConvertFromSingle(-0f)])); + Assert.Equal(2, IndexOfMaxMagnitude([ConvertFromSingle(-1), ConvertFromSingle(-0f), ConvertFromSingle(1f)])); + } + #endregion + + #region IndexOfMin + [Fact] + public void IndexOfMin_ReturnsNegative1OnEmpty() + { + Assert.Equal(-1, IndexOfMin(ReadOnlySpan.Empty)); + } + + [Fact] + public void IndexOfMin_AllLengths() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + x[expected] = Enumerable.Min(MemoryMarshal.ToEnumerable(x.Memory)); + int actual = IndexOfMin(x.Span); + Assert.True(actual == expected || (actual < expected && x[actual].Equals(x[expected])), $"{tensorLength} {actual} {expected} {string.Join(",", MemoryMarshal.ToEnumerable(x.Memory))}"); + } + }); + } + + [Fact] + public void IndexOfMin_FirstNaNReturned() + { + if (!IsFloatingPoint) return; + + Assert.All(Helpers.TensorLengths, tensorLength => + { + foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + x[expected] = ConvertFromSingle(float.NaN); + x[tensorLength - 1] = ConvertFromSingle(float.NaN); + Assert.Equal(expected, IndexOfMin(x)); + } + }); + } + + [Fact] + public void IndexOfMin_Negative0LesserThanPositive0() + { + if (!IsFloatingPoint) return; + + Assert.Equal(0, IndexOfMin([ConvertFromSingle(-0f), ConvertFromSingle(+0f)])); + Assert.Equal(1, IndexOfMin([ConvertFromSingle(+0f), ConvertFromSingle(-0f)])); + Assert.Equal(1, IndexOfMin([ConvertFromSingle(+0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f)])); + Assert.Equal(0, IndexOfMin([ConvertFromSingle(-1), ConvertFromSingle(-0f)])); + Assert.Equal(0, IndexOfMin([ConvertFromSingle(-1), ConvertFromSingle(-0f), ConvertFromSingle(1f)])); + } + #endregion + + #region IndexOfMinMagnitude + [Fact] + public void IndexOfMinMagnitude_ReturnsNegative1OnEmpty() + { + Assert.Equal(-1, IndexOfMinMagnitude(ReadOnlySpan.Empty)); + } + + [Fact] + public void IndexOfMinMagnitude_AllLengths() + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) + { + using BoundedMemory x = CreateTensor(tensorLength); + FillTensor(x, MinValue); + + T min = x[0]; + for (int i = 0; i < x.Length; i++) + { + int compared = Comparer.Default.Compare(Abs(x[i]), Abs(min)); + if (compared < 0 || (compared == 0 && Comparer.Default.Compare(x[i], min) < 0)) + { + min = x[i]; + } + } + + x[expected] = min; + int actual = IndexOfMinMagnitude(x.Span); + + if (actual != expected) + { + Assert.True(actual < expected || Comparer.Default.Compare(x[actual], x[expected]) < 0, $"{tensorLength} {actual} {expected} {string.Join(",", MemoryMarshal.ToEnumerable(x.Memory))}"); + if (IsFloatingPoint) + { + AssertEqualTolerance(Abs(x[expected]), Abs(x[actual])); + } + else + { + Assert.Equal(Abs(x[expected]), Abs(x[actual])); + } + } + } + }); + } + + [Fact] + public void IndexOfMinMagnitude_FirstNaNReturned() + { + if (!IsFloatingPoint) return; + + Assert.All(Helpers.TensorLengths, tensorLength => + { + foreach (int expected in new[] { 0, tensorLength / 2, tensorLength - 1 }) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + x[expected] = ConvertFromSingle(float.NaN); + x[tensorLength - 1] = ConvertFromSingle(float.NaN); + Assert.Equal(expected, IndexOfMinMagnitude(x)); + } + }); + } + + [Fact] + public void IndexOfMinMagnitude_Negative0LesserThanPositive0() + { + if (!IsFloatingPoint) return; + + Assert.Equal(0, IndexOfMinMagnitude([ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f)])); + Assert.Equal(0, IndexOfMinMagnitude([ConvertFromSingle(-0f), ConvertFromSingle(+0f)])); + Assert.Equal(1, IndexOfMinMagnitude([ConvertFromSingle(+0f), ConvertFromSingle(-0f)])); + Assert.Equal(1, IndexOfMinMagnitude([ConvertFromSingle(+0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f), ConvertFromSingle(-0f)])); + Assert.Equal(1, IndexOfMinMagnitude([ConvertFromSingle(-1), ConvertFromSingle(-0f)])); + Assert.Equal(1, IndexOfMinMagnitude([ConvertFromSingle(-1), ConvertFromSingle(-0f), ConvertFromSingle(1f)])); + } + #endregion + #region Log [Fact] public void Log_AllValues() From a236b84b76a95c7ad2a27daa97887c471bcb8737 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 21:08:20 -0500 Subject: [PATCH 02/14] Re-fix IndexOfXx operations --- .../netcore/TensorPrimitives.netcore.cs | 372 ++++++++++-------- .../TensorPrimitives.Single.netstandard.cs | 295 ++++++++------ 2 files changed, 372 insertions(+), 295 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 7d3f752c483c3..cc9a6df72e159 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -2754,7 +2754,7 @@ static Vector512 CreateVector512T(int i) => sizeof(T) == sizeof(short) ? Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As() : sizeof(T) == sizeof(byte) ? Vector512.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63).As() : throw new NotSupportedException(); - Vector512 curIndex = resultIndex; + Vector512 currentIndex = resultIndex; Vector512 increment = CreateVector512T(Vector512.Count); // Load the first vector as the initial set of results, and bail immediately @@ -2780,7 +2780,7 @@ static Vector512 CreateVector512T(int i) => { // Load the next vector, and early exit on NaN. current = Vector512.LoadUnsafe(ref xRef, (uint)i); - curIndex += increment; + currentIndex += increment; if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { @@ -2791,7 +2791,7 @@ static Vector512 CreateVector512T(int i) => } } - TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); + TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex); i += Vector512.Count; } @@ -2800,7 +2800,7 @@ static Vector512 CreateVector512T(int i) => if (i != x.Length) { current = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512.Count)); - curIndex += CreateVector512T(x.Length - i); + currentIndex += CreateVector512T(x.Length - i); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { @@ -2809,13 +2809,13 @@ static Vector512 CreateVector512T(int i) => { int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); return - sizeof(T) == sizeof(long) ? (int)(long)(object)curIndex.As()[indexInVectorOfFirstMatch] : - sizeof(T) == sizeof(int) ? (int)(object)curIndex.As()[indexInVectorOfFirstMatch] : + sizeof(T) == sizeof(long) ? (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : + sizeof(T) == sizeof(int) ? (int)(object)currentIndex.As()[indexInVectorOfFirstMatch] : throw new NotSupportedException(); } } - TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); + TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex); } // Aggregate the lanes in the vector to create the final scalar result. @@ -2839,7 +2839,7 @@ static Vector256 CreateVector256T(int i) => sizeof(T) == sizeof(short) ? Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As() : sizeof(T) == sizeof(byte) ? Vector256.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As() : throw new NotSupportedException(); - Vector256 curIndex = resultIndex; + Vector256 currentIndex = resultIndex; Vector256 increment = CreateVector256T(Vector256.Count); // Load the first vector as the initial set of results, and bail immediately @@ -2865,7 +2865,7 @@ static Vector256 CreateVector256T(int i) => { // Load the next vector, and early exit on NaN. current = Vector256.LoadUnsafe(ref xRef, (uint)i); - curIndex += increment; + currentIndex += increment; if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { @@ -2876,7 +2876,7 @@ static Vector256 CreateVector256T(int i) => } } - TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); + TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex); i += Vector256.Count; } @@ -2885,7 +2885,7 @@ static Vector256 CreateVector256T(int i) => if (i != x.Length) { current = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256.Count)); - curIndex += CreateVector256T(x.Length - i); + currentIndex += CreateVector256T(x.Length - i); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { @@ -2894,13 +2894,13 @@ static Vector256 CreateVector256T(int i) => { int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); return - sizeof(T) == sizeof(long) ? (int)(long)(object)curIndex.As()[indexInVectorOfFirstMatch] : - sizeof(T) == sizeof(int) ? (int)(object)curIndex.As()[indexInVectorOfFirstMatch] : + sizeof(T) == sizeof(long) ? (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : + sizeof(T) == sizeof(int) ? (int)(object)currentIndex.As()[indexInVectorOfFirstMatch] : throw new NotSupportedException(); } } - TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); + TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex); } // Aggregate the lanes in the vector to create the final scalar result. @@ -2924,7 +2924,7 @@ static Vector128 CreateVector128T(int i) => sizeof(T) == sizeof(short) ? Vector128.Create(0, 1, 2, 3, 4, 5, 6, 7).As() : sizeof(T) == sizeof(byte) ? Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As() : throw new NotSupportedException(); - Vector128 curIndex = resultIndex; + Vector128 currentIndex = resultIndex; Vector128 increment = CreateVector128T(Vector128.Count); // Load the first vector as the initial set of results, and bail immediately @@ -2950,7 +2950,7 @@ static Vector128 CreateVector128T(int i) => { // Load the next vector, and early exit on NaN. current = Vector128.LoadUnsafe(ref xRef, (uint)i); - curIndex += increment; + currentIndex += increment; if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { @@ -2961,7 +2961,7 @@ static Vector128 CreateVector128T(int i) => } } - TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); + TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex); i += Vector128.Count; } @@ -2970,7 +2970,7 @@ static Vector128 CreateVector128T(int i) => if (i != x.Length) { current = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128.Count)); - curIndex += CreateVector128T(x.Length - i); + currentIndex += CreateVector128T(x.Length - i); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { @@ -2979,13 +2979,13 @@ static Vector128 CreateVector128T(int i) => { int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); return - sizeof(T) == sizeof(long) ? (int)(long)(object)curIndex.As()[indexInVectorOfFirstMatch] : - sizeof(T) == sizeof(int) ? (int)(object)curIndex.As()[indexInVectorOfFirstMatch] : + sizeof(T) == sizeof(long) ? (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : + sizeof(T) == sizeof(int) ? (int)(object)currentIndex.As()[indexInVectorOfFirstMatch] : throw new NotSupportedException(); } } - TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, curIndex); + TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex); } // Aggregate the lanes in the vector to create the final scalar result. @@ -10112,10 +10112,10 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) private interface IIndexOfOperator where T : INumber { - static abstract int Invoke(ref T result, T current, int resultIndex, int curIndex); - static abstract void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex); - static abstract void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex); - static abstract void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex); + static abstract int Invoke(ref T result, T current, int resultIndex, int currentIndex); + static abstract void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 currentIndex); + static abstract void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 currentIndex); + static abstract void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -10228,114 +10228,124 @@ private static int IndexOfFinalAggregate(Vector512 resul return IndexOfFinalAggregate(resultLower, indexLower); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 IndexLessThan(Vector128 indices1, Vector128 indices2) => sizeof(T) == sizeof(long) ? Vector128.LessThan(indices1.AsInt64(), indices2.AsInt64()).As() : sizeof(T) == sizeof(int) ? Vector128.LessThan(indices1.AsInt32(), indices2.AsInt32()).As() : sizeof(T) == sizeof(short) ? Vector128.LessThan(indices1.AsInt16(), indices2.AsInt16()).As() : - sizeof(T) == sizeof(byte) ? Vector128.LessThan(indices1.AsByte(), indices2.AsByte()).As() : - throw new NotSupportedException(); + Vector128.LessThan(indices1.AsByte(), indices2.AsByte()).As(); + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 IndexLessThan(Vector256 indices1, Vector256 indices2) => sizeof(T) == sizeof(long) ? Vector256.LessThan(indices1.AsInt64(), indices2.AsInt64()).As() : sizeof(T) == sizeof(int) ? Vector256.LessThan(indices1.AsInt32(), indices2.AsInt32()).As() : sizeof(T) == sizeof(short) ? Vector256.LessThan(indices1.AsInt16(), indices2.AsInt16()).As() : - sizeof(T) == sizeof(byte) ? Vector256.LessThan(indices1.AsByte(), indices2.AsByte()).As() : - throw new NotSupportedException(); + Vector256.LessThan(indices1.AsByte(), indices2.AsByte()).As(); + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector512 IndexLessThan(Vector512 indices1, Vector512 indices2) => sizeof(T) == sizeof(long) ? Vector512.LessThan(indices1.AsInt64(), indices2.AsInt64()).As() : sizeof(T) == sizeof(int) ? Vector512.LessThan(indices1.AsInt32(), indices2.AsInt32()).As() : sizeof(T) == sizeof(short) ? Vector512.LessThan(indices1.AsInt16(), indices2.AsInt16()).As() : - sizeof(T) == sizeof(byte) ? Vector512.LessThan(indices1.AsByte(), indices2.AsByte()).As() : - throw new NotSupportedException(); + Vector512.LessThan(indices1.AsByte(), indices2.AsByte()).As(); /// Returns the index of MathF.Max(x, y) internal readonly struct IndexOfMaxOperator : IIndexOfOperator where T : INumber { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) + public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 currentIndex) { - Vector128 greaterThanMask = Vector128.GreaterThan(result, current); + Vector128 useResult = Vector128.GreaterThan(result, current); Vector128 equalMask = Vector128.Equals(result, current); if (equalMask != Vector128.Zero) { - Vector128 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector128 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); + Vector128 currentNegative = IsNegative(current); + Vector128 sameSign = Vector128.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); + useResult |= equalMask & Vector128.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); } else { - greaterThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(greaterThanMask, result, current); - resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) + public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 currentIndex) { - Vector256 greaterThanMask = Vector256.GreaterThan(result, current); + Vector256 useResult = Vector256.GreaterThan(result, current); Vector256 equalMask = Vector256.Equals(result, current); if (equalMask != Vector256.Zero) { - Vector256 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector256 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); + Vector256 currentNegative = IsNegative(current); + Vector256 sameSign = Vector256.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); + useResult |= equalMask & Vector256.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); } else { - greaterThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(greaterThanMask, result, current); - resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) + public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 currentIndex) { - Vector512 greaterThanMask = Vector512.GreaterThan(result, current); + Vector512 useResult = Vector512.GreaterThan(result, current); Vector512 equalMask = Vector512.Equals(result, current); if (equalMask != Vector512.Zero) { - Vector512 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector512 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); + Vector512 currentNegative = IsNegative(current); + Vector512 sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); + useResult |= equalMask & Vector512.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); } else { - greaterThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(greaterThanMask, result, current); - resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(ref T result, T current, int resultIndex, int curIndex) + public static int Invoke(ref T result, T current, int resultIndex, int currentIndex) { if (result == current) { - if (IsNegative(result) && !IsNegative(current)) + bool resultNegative = IsNegative(result); + if ((resultNegative == IsNegative(current)) ? (currentIndex < resultIndex) : resultNegative) { result = current; - return curIndex; + return currentIndex; } } else if (current > result) { result = current; - return curIndex; + return currentIndex; } return resultIndex; @@ -10345,95 +10355,105 @@ public static int Invoke(ref T result, T current, int resultIndex, int curIndex) internal readonly struct IndexOfMaxMagnitudeOperator : IIndexOfOperator where T : INumber { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) + public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 currentIndex) { Vector128 resultMag = Vector128.Abs(result), currentMag = Vector128.Abs(current); - Vector128 greaterThanMask = Vector128.GreaterThan(resultMag, currentMag); + Vector128 useResult = Vector128.GreaterThan(resultMag, currentMag); Vector128 equalMask = Vector128.Equals(resultMag, currentMag); if (equalMask != Vector128.Zero) { - Vector128 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector128 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); + Vector128 currentNegative = IsNegative(current); + Vector128 sameSign = Vector128.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); + useResult |= equalMask & Vector128.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); } else { - greaterThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(greaterThanMask, result, current); - resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) + public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 currentIndex) { Vector256 resultMag = Vector256.Abs(result), currentMag = Vector256.Abs(current); - Vector256 greaterThanMask = Vector256.GreaterThan(resultMag, currentMag); + Vector256 useResult = Vector256.GreaterThan(resultMag, currentMag); Vector256 equalMask = Vector256.Equals(resultMag, currentMag); if (equalMask != Vector256.Zero) { - Vector256 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector256 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); + Vector256 currentNegative = IsNegative(current); + Vector256 sameSign = Vector256.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); + useResult |= equalMask & Vector256.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); } else { - greaterThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(greaterThanMask, result, current); - resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) + public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 currentIndex) { Vector512 resultMag = Vector512.Abs(result), currentMag = Vector512.Abs(current); - Vector512 greaterThanMask = Vector512.GreaterThan(resultMag, currentMag); + Vector512 useResult = Vector512.GreaterThan(resultMag, currentMag); Vector512 equalMask = Vector512.Equals(resultMag, currentMag); if (equalMask != Vector512.Zero) { - Vector512 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector512 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - greaterThanMask |= (equalMask & (~IsNegative(result) | IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); + Vector512 currentNegative = IsNegative(current); + Vector512 sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); + useResult |= equalMask & Vector512.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); } else { - greaterThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(greaterThanMask, result, current); - resultIndex = ElementWiseSelect(greaterThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(ref T result, T current, int resultIndex, int curIndex) + public static int Invoke(ref T result, T current, int resultIndex, int currentIndex) { T resultMag = T.Abs(result); T currentMag = T.Abs(current); if (resultMag == currentMag) { - if (IsNegative(result) && !IsNegative(current)) + bool resultNegative = IsNegative(result); + if ((resultNegative == IsNegative(current)) ? (currentIndex < resultIndex) : resultNegative) { result = current; - return curIndex; + return currentIndex; } } else if (currentMag > resultMag) { result = current; - return curIndex; + return currentIndex; } return resultIndex; @@ -10444,89 +10464,99 @@ public static int Invoke(ref T result, T current, int resultIndex, int curIndex) internal readonly struct IndexOfMinOperator : IIndexOfOperator where T : INumber { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) + public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 currentIndex) { - Vector128 lessThanMask = Vector128.LessThan(result, current); + Vector128 useResult = Vector128.LessThan(result, current); Vector128 equalMask = Vector128.Equals(result, current); if (equalMask != Vector128.Zero) { - Vector128 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector128 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); + Vector128 resultNegative = IsNegative(result); + Vector128 sameSign = Vector128.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); + useResult |= equalMask & Vector128.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); } else { - lessThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(lessThanMask, result, current); - resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) + public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 currentIndex) { - Vector256 lessThanMask = Vector256.LessThan(result, current); + Vector256 useResult = Vector256.LessThan(result, current); Vector256 equalMask = Vector256.Equals(result, current); if (equalMask != Vector256.Zero) { - Vector256 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector256 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); + Vector256 resultNegative = IsNegative(result); + Vector256 sameSign = Vector256.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); + useResult |= equalMask & Vector256.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); } else { - lessThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(lessThanMask, result, current); - resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) + public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 currentIndex) { - Vector512 lessThanMask = Vector512.LessThan(result, current); + Vector512 useResult = Vector512.LessThan(result, current); Vector512 equalMask = Vector512.Equals(result, current); if (equalMask != Vector512.Zero) { - Vector512 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector512 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); + Vector512 resultNegative = IsNegative(result); + Vector512 sameSign = Vector512.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); + useResult |= equalMask & Vector512.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); } else { - lessThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(lessThanMask, result, current); - resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(ref T result, T current, int resultIndex, int curIndex) + public static int Invoke(ref T result, T current, int resultIndex, int currentIndex) { if (result == current) { - if (!IsNegative(result) && IsNegative(current)) + bool currentNegative = IsNegative(current); + if ((IsNegative(result) == currentNegative) ? (currentIndex < resultIndex) : currentNegative) { result = current; - return curIndex; + return currentIndex; } } else if (current < result) { result = current; - return curIndex; + return currentIndex; } return resultIndex; @@ -10536,95 +10566,105 @@ public static int Invoke(ref T result, T current, int resultIndex, int curIndex) internal readonly struct IndexOfMinMagnitudeOperator : IIndexOfOperator where T : INumber { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 curIndex) + public static void Invoke(ref Vector128 result, Vector128 current, ref Vector128 resultIndex, Vector128 currentIndex) { Vector128 resultMag = Vector128.Abs(result), currentMag = Vector128.Abs(current); - Vector128 lessThanMask = Vector128.LessThan(resultMag, currentMag); + Vector128 useResult = Vector128.LessThan(resultMag, currentMag); Vector128 equalMask = Vector128.Equals(resultMag, currentMag); if (equalMask != Vector128.Zero) { - Vector128 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector128 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); + Vector128 resultNegative = IsNegative(result); + Vector128 sameSign = Vector128.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); + useResult |= equalMask & Vector128.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); } else { - lessThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(lessThanMask, result, current); - resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 curIndex) + public static void Invoke(ref Vector256 result, Vector256 current, ref Vector256 resultIndex, Vector256 currentIndex) { Vector256 resultMag = Vector256.Abs(result), currentMag = Vector256.Abs(current); - Vector256 lessThanMask = Vector256.LessThan(resultMag, currentMag); + Vector256 useResult = Vector256.LessThan(resultMag, currentMag); Vector256 equalMask = Vector256.Equals(resultMag, currentMag); if (equalMask != Vector256.Zero) { - Vector256 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector256 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); + Vector256 resultNegative = IsNegative(result); + Vector256 sameSign = Vector256.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); + useResult |= equalMask & Vector256.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); } else { - lessThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(lessThanMask, result, current); - resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 curIndex) + public static void Invoke(ref Vector512 result, Vector512 current, ref Vector512 resultIndex, Vector512 currentIndex) { Vector512 resultMag = Vector512.Abs(result), currentMag = Vector512.Abs(current); - Vector512 lessThanMask = Vector512.LessThan(resultMag, currentMag); + Vector512 useResult = Vector512.LessThan(resultMag, currentMag); Vector512 equalMask = Vector512.Equals(resultMag, currentMag); if (equalMask != Vector512.Zero) { - Vector512 lessThanIndexMask = IndexLessThan(resultIndex, curIndex); + Vector512 lessThanIndexMask = IndexLessThan(resultIndex, currentIndex); if (typeof(T) == typeof(float) || typeof(T) == typeof(double)) { - lessThanMask |= (equalMask & (IsNegative(result) | ~IsNegative(current)) & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); + Vector512 resultNegative = IsNegative(result); + Vector512 sameSign = Vector512.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); + useResult |= equalMask & Vector512.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); } else { - lessThanMask |= (equalMask & lessThanIndexMask); + useResult |= equalMask & lessThanIndexMask; } } - result = ElementWiseSelect(lessThanMask, result, current); - resultIndex = ElementWiseSelect(lessThanMask, resultIndex, curIndex); + result = ElementWiseSelect(useResult, result, current); + resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int Invoke(ref T result, T current, int resultIndex, int curIndex) + public static int Invoke(ref T result, T current, int resultIndex, int currentIndex) { T resultMag = T.Abs(result); T currentMag = T.Abs(current); if (resultMag == currentMag) { - if (!IsNegative(result) && IsNegative(current)) + bool currentNegative = IsNegative(current); + if ((IsNegative(result) == currentNegative) ? (currentIndex < resultIndex) : currentNegative) { result = current; - return curIndex; + return currentIndex; } } else if (currentMag < resultMag) { result = current; - return curIndex; + return currentIndex; } return resultIndex; @@ -13783,19 +13823,19 @@ public static Vector512 Invoke(Vector512 x) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 ElementWiseSelect(Vector128 mask, Vector128 left, Vector128 right) { - //if (Sse41.IsSupported) - //{ - // if (typeof(T) == typeof(byte)) return Sse41.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As(); - // if (typeof(T) == typeof(sbyte)) return Sse41.BlendVariable(left.AsSByte(), right.AsSByte(), (~mask).AsSByte()).As(); - // if (typeof(T) == typeof(ushort)) return Sse41.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As(); - // if (typeof(T) == typeof(short)) return Sse41.BlendVariable(left.AsInt16(), right.AsInt16(), (~mask).AsInt16()).As(); - // if (typeof(T) == typeof(uint)) return Sse41.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); - // if (typeof(T) == typeof(int)) return Sse41.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); - // if (typeof(T) == typeof(ulong)) return Sse41.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); - // if (typeof(T) == typeof(long)) return Sse41.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); - // if (typeof(T) == typeof(float)) return Sse41.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); - // if (typeof(T) == typeof(double)) return Sse41.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); - //} + if (Sse41.IsSupported) + { + if (typeof(T) == typeof(byte)) return Sse41.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As(); + if (typeof(T) == typeof(sbyte)) return Sse41.BlendVariable(left.AsSByte(), right.AsSByte(), (~mask).AsSByte()).As(); + if (typeof(T) == typeof(ushort)) return Sse41.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As(); + if (typeof(T) == typeof(short)) return Sse41.BlendVariable(left.AsInt16(), right.AsInt16(), (~mask).AsInt16()).As(); + if (typeof(T) == typeof(uint)) return Sse41.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); + if (typeof(T) == typeof(int)) return Sse41.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); + if (typeof(T) == typeof(ulong)) return Sse41.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); + if (typeof(T) == typeof(long)) return Sse41.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); + if (typeof(T) == typeof(float)) return Sse41.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); + if (typeof(T) == typeof(double)) return Sse41.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); + } return Vector128.ConditionalSelect(mask, left, right); } @@ -13803,19 +13843,19 @@ private static Vector128 ElementWiseSelect(Vector128 mask, Vector128 [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 ElementWiseSelect(Vector256 mask, Vector256 left, Vector256 right) { - //if (Avx2.IsSupported) - //{ - // if (typeof(T) == typeof(byte)) return Avx2.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As(); - // if (typeof(T) == typeof(sbyte)) return Avx2.BlendVariable(left.AsSByte(), right.AsSByte(), (~mask).AsSByte()).As(); - // if (typeof(T) == typeof(ushort)) return Avx2.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As(); - // if (typeof(T) == typeof(short)) return Avx2.BlendVariable(left.AsInt16(), right.AsInt16(), (~mask).AsInt16()).As(); - // if (typeof(T) == typeof(uint)) return Avx2.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); - // if (typeof(T) == typeof(int)) return Avx2.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); - // if (typeof(T) == typeof(ulong)) return Avx2.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); - // if (typeof(T) == typeof(long)) return Avx2.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); - // if (typeof(T) == typeof(float)) return Avx2.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); - // if (typeof(T) == typeof(double)) return Avx2.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); - //} + if (Avx2.IsSupported) + { + if (typeof(T) == typeof(byte)) return Avx2.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As(); + if (typeof(T) == typeof(sbyte)) return Avx2.BlendVariable(left.AsSByte(), right.AsSByte(), (~mask).AsSByte()).As(); + if (typeof(T) == typeof(ushort)) return Avx2.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As(); + if (typeof(T) == typeof(short)) return Avx2.BlendVariable(left.AsInt16(), right.AsInt16(), (~mask).AsInt16()).As(); + if (typeof(T) == typeof(uint)) return Avx2.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); + if (typeof(T) == typeof(int)) return Avx2.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); + if (typeof(T) == typeof(ulong)) return Avx2.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); + if (typeof(T) == typeof(long)) return Avx2.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); + if (typeof(T) == typeof(float)) return Avx2.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); + if (typeof(T) == typeof(double)) return Avx2.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); + } return Vector256.ConditionalSelect(mask, left, right); } @@ -13823,15 +13863,15 @@ private static Vector256 ElementWiseSelect(Vector256 mask, Vector256 [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector512 ElementWiseSelect(Vector512 mask, Vector512 left, Vector512 right) { - //if (Avx512F.IsSupported) - //{ - // if (typeof(T) == typeof(uint)) return Avx512F.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); - // if (typeof(T) == typeof(int)) return Avx512F.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); - // if (typeof(T) == typeof(ulong)) return Avx512F.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); - // if (typeof(T) == typeof(long)) return Avx512F.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); - // if (typeof(T) == typeof(float)) return Avx512F.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); - // if (typeof(T) == typeof(double)) return Avx512F.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); - //} + if (Avx512F.IsSupported) + { + if (typeof(T) == typeof(uint)) return Avx512F.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As(); + if (typeof(T) == typeof(int)) return Avx512F.BlendVariable(left.AsInt32(), right.AsInt32(), (~mask).AsInt32()).As(); + if (typeof(T) == typeof(ulong)) return Avx512F.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As(); + if (typeof(T) == typeof(long)) return Avx512F.BlendVariable(left.AsInt64(), right.AsInt64(), (~mask).AsInt64()).As(); + if (typeof(T) == typeof(float)) return Avx512F.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As(); + if (typeof(T) == typeof(double)) return Avx512F.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As(); + } return Vector512.ConditionalSelect(mask, left, right); } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs index 6b6d22aba468e..c9474cb470fd7 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs @@ -831,7 +831,7 @@ private static int IndexOfMinMaxCore(ReadOnlySpan resultIndex = new Vector(s_0through7); - Vector curIndex = resultIndex; + Vector currentIndex = resultIndex; Vector increment = new Vector(Vector.Count); // Load the first vector as the initial set of results, and bail immediately @@ -847,21 +847,21 @@ private static int IndexOfMinMaxCore(ReadOnlySpan.Count; } // If any elements remain, handle them in one final vector. if (i != x.Length) { - curIndex = Vector.Add(curIndex, new Vector(x.Length - i)); + currentIndex = Vector.Add(currentIndex, new Vector(x.Length - i)); current = AsVector(ref xRef, x.Length - Vector.Count); if (!Vector.EqualsAll(current, current)) @@ -869,7 +869,7 @@ private static int IndexOfMinMaxCore(ReadOnlySpan Invoke(Vector x, Vector y) private interface IIndexOfOperator { - int Invoke(ref float result, float current, int resultIndex, int curIndex); + int Invoke(ref float result, float current, int resultIndex, int currentIndex); int Invoke(Vector result, Vector resultIndex); - void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector curIndex); + void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector currentIndex); } /// Returns the index of MathF.Max(x, y) private readonly struct IndexOfMaxOperator_Single : IIndexOfOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int Invoke(Vector result, Vector resultIndex) + public int Invoke(ref float result, float current, int resultIndex, int currentIndex) { - float curMax = result[0]; - int curIn = resultIndex[0]; - for (int i = 1; i < Vector.Count; i++) + if (result == current) { - if (result[i] == curMax && IsNegative(curMax) && !IsNegative(result[i])) + bool resultNegative = IsNegative(result); + if ((resultNegative == IsNegative(current)) ? (currentIndex < resultIndex) : resultNegative) { - curMax = result[i]; - curIn = resultIndex[i]; - } - else if (result[i] > curMax) - { - curMax = result[i]; - curIn = resultIndex[i]; + result = current; + return currentIndex; } } + else if (current > result) + { + result = current; + return currentIndex; + } - return curIn; + return resultIndex; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector curIndex) + public int Invoke(Vector current, Vector currentIndex) { - Vector lessThanMask = Vector.GreaterThan(result, current); - - Vector equalMask = Vector.Equals(result, current); + float result = current[0]; + int resultIndex = currentIndex[0]; - if (equalMask != Vector.Zero) + for (int i = 1; i < Vector.Count; i++) { - Vector negativeMask = IsNegative(current); - Vector lessThanIndexMask = Vector.LessThan(resultIndex, curIndex); - - lessThanMask |= ((Vector)~negativeMask & equalMask) | ((Vector)IsNegative(result) & equalMask & lessThanIndexMask); + if (current[i] == result) + { + bool resultNegative = IsNegative(result); + if ((resultNegative == IsNegative(current[i])) ? (currentIndex[i] < resultIndex) : resultNegative) + { + result = current[i]; + resultIndex = currentIndex[i]; + } + } + else if (current[i] > result) + { + result = current[i]; + resultIndex = currentIndex[i]; + } } - result = Vector.ConditionalSelect(lessThanMask, result, current); - - resultIndex = Vector.ConditionalSelect(lessThanMask, resultIndex, curIndex); + return resultIndex; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int Invoke(ref float result, float current, int resultIndex, int curIndex) + public void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector currentIndex) { - if (result == current) + Vector useResult = Vector.GreaterThan(result, current); + Vector equalMask = Vector.Equals(result, current); + + if (equalMask != Vector.Zero) { - if (IsNegative(result) && !IsNegative(current)) - { - result = current; - return curIndex; - } - } - else if (current > result) - { - result = current; - return curIndex; + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); + Vector currentNegative = IsNegative(current); + useResult |= + equalMask & + Vector.ConditionalSelect(Vector.Equals(IsNegative(result), currentNegative), + Vector.LessThan(resultIndex, currentIndex), + (Vector)currentNegative); } - return resultIndex; + result = Vector.ConditionalSelect(useResult, result, current); + resultIndex = Vector.ConditionalSelect(useResult, resultIndex, currentIndex); } } private readonly struct IndexOfMaxMagnitudeOperator_Single : IIndexOfOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int Invoke(ref float result, float current, int resultIndex, int curIndex) + public int Invoke(ref float result, float current, int resultIndex, int currentIndex) { - float curMaxAbs = MathF.Abs(result); + float resultAbs = MathF.Abs(result); float currentAbs = MathF.Abs(current); - if (curMaxAbs == currentAbs) + if (resultAbs == currentAbs) { - if (IsNegative(result) && !IsNegative(current)) + bool resultNegative = IsNegative(result); + if ((resultNegative == IsNegative(current)) ? (currentIndex < resultIndex) : resultNegative) { result = current; - return curIndex; + return currentIndex; } } - else if (currentAbs > curMaxAbs) + else if (currentAbs > resultAbs) { result = current; - return curIndex; + return currentIndex; } return resultIndex; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int Invoke(Vector result, Vector maxIndex) + public int Invoke(Vector current, Vector currentIndex) { - float curMax = result[0]; - int curIn = maxIndex[0]; + float result = current[0]; + float resultAbs = MathF.Abs(result); + int resultIndex = currentIndex[0]; + for (int i = 1; i < Vector.Count; i++) { - if (MathF.Abs(result[i]) == MathF.Abs(curMax) && IsNegative(curMax) && !IsNegative(result[i])) + float currentAbs = MathF.Abs(current[i]); + + if (resultAbs == currentAbs) { - curMax = result[i]; - curIn = maxIndex[i]; + bool resultNegative = IsNegative(result); + if ((resultNegative == IsNegative(current[i])) ? (currentIndex[i] < resultIndex) : resultNegative) + { + result = current[i]; + resultAbs = currentAbs; + resultIndex = currentIndex[i]; + } } - else if (MathF.Abs(result[i]) > MathF.Abs(curMax)) + else if (currentAbs > resultAbs) { - curMax = result[i]; - curIn = maxIndex[i]; + result = current[i]; + resultAbs = currentAbs; + resultIndex = currentIndex[i]; } } - return curIn; + return resultIndex; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector curIndex) + public void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector currentIndex) { - Vector maxMag = Vector.Abs(result), currentMag = Vector.Abs(current); - - Vector lessThanMask = Vector.GreaterThan(maxMag, currentMag); - - Vector equalMask = Vector.Equals(result, current); + Vector resultMag = Vector.Abs(result), currentMag = Vector.Abs(current); + Vector useResult = Vector.GreaterThan(resultMag, currentMag); + Vector equalMask = Vector.Equals(resultMag, currentMag); if (equalMask != Vector.Zero) { - Vector negativeMask = IsNegative(current); - Vector lessThanIndexMask = Vector.LessThan(resultIndex, curIndex); - - lessThanMask |= ((Vector)~negativeMask & equalMask) | ((Vector)IsNegative(result) & equalMask & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); + Vector currentNegative = IsNegative(current); + useResult |= + equalMask & + Vector.ConditionalSelect(Vector.Equals(IsNegative(result), currentNegative), + Vector.LessThan(resultIndex, currentIndex), + (Vector)currentNegative); } - result = Vector.ConditionalSelect(lessThanMask, result, current); - - resultIndex = Vector.ConditionalSelect(lessThanMask, resultIndex, curIndex); + result = Vector.ConditionalSelect(useResult, result, current); + resultIndex = Vector.ConditionalSelect(useResult, resultIndex, currentIndex); } } private readonly struct IndexOfMinOperator_Single : IIndexOfOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int Invoke(ref float result, float current, int resultIndex, int curIndex) + public int Invoke(ref float result, float current, int resultIndex, int currentIndex) { if (result == current) { - if (IsPositive(result) && !IsPositive(current)) + bool currentNegative = IsNegative(current); + if ((IsNegative(result) == currentNegative) ? (currentIndex < resultIndex) : currentNegative) { result = current; - return curIndex; + return currentIndex; } } else if (current < result) { result = current; - return curIndex; + return currentIndex; } return resultIndex; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int Invoke(Vector result, Vector resultIndex) + public int Invoke(Vector current, Vector currentIndex) { - float curMin = result[0]; - int curIn = resultIndex[0]; + float result = current[0]; + int resultIndex = currentIndex[0]; + for (int i = 1; i < Vector.Count; i++) { - if (result[i] == curMin && IsPositive(curMin) && !IsPositive(result[i])) + if (current[i] == result) { - curMin = result[i]; - curIn = resultIndex[i]; + bool currentNegative = IsNegative(current[i]); + if ((IsNegative(result) == currentNegative) ? (currentIndex[i] < resultIndex) : currentNegative) + { + result = current[i]; + resultIndex = currentIndex[i]; + } } - else if (result[i] < curMin) + else if (current[i] < result) { - curMin = result[i]; - curIn = resultIndex[i]; + result = current[i]; + resultIndex = currentIndex[i]; } } - return curIn; + return resultIndex; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector curIndex) + public void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector currentIndex) { - Vector lessThanMask = Vector.LessThan(result, current); - + Vector useResult = Vector.LessThan(result, current); Vector equalMask = Vector.Equals(result, current); if (equalMask != Vector.Zero) { - Vector negativeMask = IsNegative(current); - Vector lessThanIndexMask = Vector.LessThan(resultIndex, curIndex); - - lessThanMask |= ((Vector)negativeMask & equalMask) | (~(Vector)IsNegative(result) & equalMask & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); + Vector resultNegative = IsNegative(result); + useResult |= + equalMask & + Vector.ConditionalSelect(Vector.Equals(resultNegative, IsNegative(current)), + Vector.LessThan(resultIndex, currentIndex), + (Vector)resultNegative); } - result = Vector.ConditionalSelect(lessThanMask, result, current); - - resultIndex = Vector.ConditionalSelect(lessThanMask, resultIndex, curIndex); + result = Vector.ConditionalSelect(useResult, result, current); + resultIndex = Vector.ConditionalSelect(useResult, resultIndex, currentIndex); } } private readonly struct IndexOfMinMagnitudeOperator_Single : IIndexOfOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int Invoke(ref float result, float current, int resultIndex, int curIndex) + public int Invoke(ref float result, float current, int resultIndex, int currentIndex) { - float curMinAbs = MathF.Abs(result); + float resultAbs = MathF.Abs(result); float currentAbs = MathF.Abs(current); - if (curMinAbs == currentAbs) + + if (resultAbs == currentAbs) { - if (IsPositive(result) && !IsPositive(current)) + bool currentNegative = IsNegative(current); + if ((IsNegative(result) == currentNegative) ? (currentIndex < resultIndex) : currentNegative) { result = current; - return curIndex; + return currentIndex; } } - else if (currentAbs < curMinAbs) + else if (currentAbs < resultAbs) { result = current; - return curIndex; + return currentIndex; } return resultIndex; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int Invoke(Vector result, Vector resultIndex) + public int Invoke(Vector current, Vector currentIndex) { - float curMin = result[0]; - int curIn = resultIndex[0]; + float result = current[0]; + float resultAbs = MathF.Abs(result); + int resultIndex = currentIndex[0]; + for (int i = 1; i < Vector.Count; i++) { - if (MathF.Abs(result[i]) == MathF.Abs(curMin) && IsPositive(curMin) && !IsPositive(result[i])) + float currentAbs = MathF.Abs(current[i]); + + if (resultAbs == currentAbs) { - curMin = result[i]; - curIn = resultIndex[i]; + bool currentNegative = IsNegative(current[i]); + if ((IsNegative(result) == currentNegative) ? (currentIndex[i] < resultIndex) : currentNegative) + { + result = current[i]; + resultAbs = currentAbs; + resultIndex = currentIndex[i]; + } } - else if (MathF.Abs(result[i]) < MathF.Abs(curMin)) + else if (currentAbs < resultAbs) { - curMin = result[i]; - curIn = resultIndex[i]; + result = current[i]; + resultAbs = currentAbs; + resultIndex = currentIndex[i]; } } - return curIn; + return resultIndex; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector curIndex) + public void Invoke(ref Vector result, Vector current, ref Vector resultIndex, Vector currentIndex) { - Vector minMag = Vector.Abs(result), currentMag = Vector.Abs(current); - - Vector lessThanMask = Vector.LessThan(minMag, currentMag); - - Vector equalMask = Vector.Equals(result, current); + Vector resultMag = Vector.Abs(result), currentMag = Vector.Abs(current); + Vector useResult = Vector.LessThan(resultMag, currentMag); + Vector equalMask = Vector.Equals(resultMag, currentMag); if (equalMask != Vector.Zero) { - Vector negativeMask = IsNegative(current); - Vector lessThanIndexMask = Vector.LessThan(resultIndex, curIndex); - - lessThanMask |= ((Vector)negativeMask & equalMask) | (~(Vector)IsNegative(result) & equalMask & lessThanIndexMask); + // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); + Vector resultNegative = IsNegative(result); + useResult |= + equalMask & + Vector.ConditionalSelect(Vector.Equals(resultNegative, IsNegative(current)), + Vector.LessThan(resultIndex, currentIndex), + (Vector)resultNegative); } - result = Vector.ConditionalSelect(lessThanMask, result, current); - - resultIndex = Vector.ConditionalSelect(lessThanMask, resultIndex, curIndex); + result = Vector.ConditionalSelect(useResult, result, current); + resultIndex = Vector.ConditionalSelect(useResult, resultIndex, currentIndex); } } From 265b37a3402af7a991384a2c97df60df2bde56d2 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 21:09:42 -0500 Subject: [PATCH 03/14] Rename MultiplyAddEstimate to FusedMultiplyAdd --- .../ref/System.Numerics.Tensors.netcore.cs | 6 +-- .../Tensors/netcore/TensorPrimitives.T.cs | 12 ++--- .../netcore/TensorPrimitives.netcore.cs | 2 +- .../tests/TensorPrimitives.Generic.cs | 48 +++++++++---------- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index 93da492b69dde..8ce0932b2bfcc 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -79,9 +79,9 @@ public static void Min(System.ReadOnlySpan x, System.ReadOnlySpan y, Sy public static void MultiplyAdd(System.ReadOnlySpan x, System.ReadOnlySpan y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } public static void MultiplyAdd(System.ReadOnlySpan x, System.ReadOnlySpan y, T addend, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } public static void MultiplyAdd(System.ReadOnlySpan x, T y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } - public static void MultiplyAddEstimate(System.ReadOnlySpan x, System.ReadOnlySpan y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } - public static void MultiplyAddEstimate(System.ReadOnlySpan x, System.ReadOnlySpan y, T addend, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } - public static void MultiplyAddEstimate(System.ReadOnlySpan x, T y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void FusedMultiplyAdd(System.ReadOnlySpan x, System.ReadOnlySpan y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void FusedMultiplyAdd(System.ReadOnlySpan x, System.ReadOnlySpan y, T addend, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void FusedMultiplyAdd(System.ReadOnlySpan x, T y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } public static void Multiply(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { } public static void Multiply(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { } public static void Negate(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IUnaryNegationOperators { } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index b97f57499a84d..8e4c3d7d589e8 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -1435,9 +1435,9 @@ public static void MultiplyAdd(ReadOnlySpan x, T y, ReadOnlySpan addend /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void MultiplyAddEstimate(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan addend, Span destination) + public static void FusedMultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan addend, Span destination) where T : IFloatingPointIeee754 => - InvokeSpanSpanSpanIntoSpan>(x, y, addend, destination); + InvokeSpanSpanSpanIntoSpan>(x, y, addend, destination); /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. /// The first tensor, represented as a span. @@ -1457,9 +1457,9 @@ public static void MultiplyAddEstimate(ReadOnlySpan x, ReadOnlySpan y, /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void MultiplyAddEstimate(ReadOnlySpan x, ReadOnlySpan y, T addend, Span destination) + public static void FusedMultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, T addend, Span destination) where T : IFloatingPointIeee754 => - InvokeSpanSpanScalarIntoSpan>(x, y, addend, destination); + InvokeSpanSpanScalarIntoSpan>(x, y, addend, destination); /// Computes the element-wise result of ( * ) * for the specified tensors of numbers. /// The first tensor, represented as a span. @@ -1478,9 +1478,9 @@ public static void MultiplyAddEstimate(ReadOnlySpan x, ReadOnlySpan y, /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. /// /// - public static void MultiplyAddEstimate(ReadOnlySpan x, T y, ReadOnlySpan addend, Span destination) + public static void FusedMultiplyAdd(ReadOnlySpan x, T y, ReadOnlySpan addend, Span destination) where T : IFloatingPointIeee754 => - InvokeSpanScalarSpanIntoSpan>(x, y, addend, destination); + InvokeSpanScalarSpanIntoSpan>(x, y, addend, destination); /// Computes the element-wise negation of each number in the specified tensor. /// The tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index cc9a6df72e159..abf79bea82860 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -11240,7 +11240,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) } /// (x * y) + z - internal readonly struct MultiplyAddEstimateOperator : ITernaryOperator where T : IFloatingPointIeee754 + internal readonly struct FusedMultiplyAddOperator : ITernaryOperator where T : IFloatingPointIeee754 { public static T Invoke(T x, T y, T z) => FusedMultiplyAdd(x, y, z); public static Vector128 Invoke(Vector128 x, Vector128 y, Vector128 z) => FusedMultiplyAdd(x, y, z); diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index 766aedf91195f..ceddea02b9f2b 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -500,7 +500,7 @@ public void SpanIntDestination_ThrowsForOverlapppingInputsWithOutputs(SpanScalar public static IEnumerable SpanSpanSpanDestinationFunctionsToTest() { yield return new object[] { new SpanSpanSpanDestinationDelegate(TensorPrimitives.Lerp), new Func(T.Lerp) }; - yield return new object[] { new SpanSpanSpanDestinationDelegate(TensorPrimitives.MultiplyAddEstimate), new Func(T.FusedMultiplyAdd) }; + yield return new object[] { new SpanSpanSpanDestinationDelegate(TensorPrimitives.FusedMultiplyAdd), new Func(T.FusedMultiplyAdd) }; } [Theory] @@ -786,9 +786,9 @@ public void ILogB_ThrowsForTooShortDestination() } #endregion - #region MultiplyAddEstimate + #region FusedMultiplyAdd [Fact] - public void MultiplyAddEstimate_TensorTensorScalar_AllLengths() + public void FusedMultiplyAdd_TensorTensorScalar_AllLengths() { Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => { @@ -797,7 +797,7 @@ public void MultiplyAddEstimate_TensorTensorScalar_AllLengths() T addend = NextRandom(); using BoundedMemory destination = CreateTensor(tensorLength); - TensorPrimitives.MultiplyAddEstimate(x, y, addend, destination); + TensorPrimitives.FusedMultiplyAdd(x, y, addend, destination); for (int i = 0; i < tensorLength; i++) { @@ -807,7 +807,7 @@ public void MultiplyAddEstimate_TensorTensorScalar_AllLengths() } [Fact] - public void MultiplyAddEstimate_TensorTensorScalar_InPlace() + public void FusedMultiplyAdd_TensorTensorScalar_InPlace() { Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => { @@ -815,7 +815,7 @@ public void MultiplyAddEstimate_TensorTensorScalar_InPlace() T[] xOrig = x.Span.ToArray(); T addend = NextRandom(); - TensorPrimitives.MultiplyAddEstimate(x, x, addend, x); + TensorPrimitives.FusedMultiplyAdd(x, x, addend, x); for (int i = 0; i < tensorLength; i++) { @@ -825,7 +825,7 @@ public void MultiplyAddEstimate_TensorTensorScalar_InPlace() } [Fact] - public void MultiplyAddEstimate_TensorTensorScalar_ThrowsForTooShortDestination() + public void FusedMultiplyAdd_TensorTensorScalar_ThrowsForTooShortDestination() { Assert.All(Helpers.TensorLengths, tensorLength => { @@ -834,22 +834,22 @@ public void MultiplyAddEstimate_TensorTensorScalar_ThrowsForTooShortDestination( T addend = NextRandom(); using BoundedMemory destination = CreateTensor(tensorLength - 1); - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(x, y, addend, destination)); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(x, y, addend, destination)); }); } [Fact] - public void MultiplyAddEstimate_TensorTensorScalar_ThrowsForOverlapppingInputsWithOutputs() + public void FusedMultiplyAdd_TensorTensorScalar_ThrowsForOverlapppingInputsWithOutputs() { T[] array = new T[10]; - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(0, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(2, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(3, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(5, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(3, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(5, 2))); } [Fact] - public void MultiplyAddEstimate_TensorScalarTensor() + public void FusedMultiplyAdd_TensorScalarTensor() { Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => { @@ -858,7 +858,7 @@ public void MultiplyAddEstimate_TensorScalarTensor() using BoundedMemory addend = CreateAndFillTensor(tensorLength); using BoundedMemory destination = CreateTensor(tensorLength); - TensorPrimitives.MultiplyAddEstimate(x, y, addend, destination); + TensorPrimitives.FusedMultiplyAdd(x, y, addend, destination); for (int i = 0; i < tensorLength; i++) { @@ -868,7 +868,7 @@ public void MultiplyAddEstimate_TensorScalarTensor() } [Fact] - public void MultiplyAddEstimate_TensorScalarTensor_InPlace() + public void FusedMultiplyAdd_TensorScalarTensor_InPlace() { Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => { @@ -876,7 +876,7 @@ public void MultiplyAddEstimate_TensorScalarTensor_InPlace() T[] xOrig = x.Span.ToArray(); T y = NextRandom(); - TensorPrimitives.MultiplyAddEstimate(x, y, x, x); + TensorPrimitives.FusedMultiplyAdd(x, y, x, x); for (int i = 0; i < tensorLength; i++) { @@ -886,7 +886,7 @@ public void MultiplyAddEstimate_TensorScalarTensor_InPlace() } [Fact] - public void MultiplyAddEstimate_TensorScalarTensor_ThrowsForTooShortDestination() + public void FusedMultiplyAdd_TensorScalarTensor_ThrowsForTooShortDestination() { Assert.All(Helpers.TensorLengths, tensorLength => { @@ -895,18 +895,18 @@ public void MultiplyAddEstimate_TensorScalarTensor_ThrowsForTooShortDestination( using BoundedMemory addend = CreateAndFillTensor(tensorLength); using BoundedMemory destination = CreateTensor(tensorLength - 1); - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(x, y, addend, destination)); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(x, y, addend, destination)); }); } [Fact] - public void MultiplyAddEstimate_TensorScalarTensor_ThrowsForOverlapppingInputsWithOutputs() + public void FusedMultiplyAdd_TensorScalarTensor_ThrowsForOverlapppingInputsWithOutputs() { T[] array = new T[10]; - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(0, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(2, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(3, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.MultiplyAddEstimate(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(5, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(3, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(5, 2))); } #endregion From 01ad41f690b069f02dbd078c48a6cc04962bad4b Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 21:26:16 -0500 Subject: [PATCH 04/14] Remove some NotSupportedException throws --- .../netcore/TensorPrimitives.netcore.cs | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index abf79bea82860..e7450e677097b 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -2739,21 +2739,21 @@ private static int IndexOfMinMaxCore(ReadOnlySpan x) if (Vector512.IsHardwareAccelerated && Vector512.IsSupported && x.Length >= Vector512.Count) { + Debug.Assert(sizeof(T) is 1 or 2 or 4 or 8); + [MethodImpl(MethodImplOptions.AggressiveInlining)] static Vector512 CreateVector512T(int i) => sizeof(T) == sizeof(long) ? Vector512.Create((long)i).As() : sizeof(T) == sizeof(int) ? Vector512.Create(i).As() : sizeof(T) == sizeof(short) ? Vector512.Create((short)i).As() : - sizeof(T) == sizeof(byte) ? Vector512.Create((byte)i).As() : - throw new NotSupportedException(); + Vector512.Create((byte)i).As(); ref T xRef = ref MemoryMarshal.GetReference(x); Vector512 resultIndex = sizeof(T) == sizeof(long) ? Vector512.Create(0L, 1, 2, 3, 4, 5, 6, 7).As() : sizeof(T) == sizeof(int) ? Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As() : sizeof(T) == sizeof(short) ? Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As() : - sizeof(T) == sizeof(byte) ? Vector512.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63).As() : - throw new NotSupportedException(); + Vector512.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63).As(); Vector512 currentIndex = resultIndex; Vector512 increment = CreateVector512T(Vector512.Count); @@ -2808,10 +2808,9 @@ static Vector512 CreateVector512T(int i) => if (nanMask != Vector512.Zero) { int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); - return - sizeof(T) == sizeof(long) ? (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : - sizeof(T) == sizeof(int) ? (int)(object)currentIndex.As()[indexInVectorOfFirstMatch] : - throw new NotSupportedException(); + return typeof(T) == typeof(double) ? + (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : + (int)(object)currentIndex.As()[indexInVectorOfFirstMatch]; } } @@ -2824,21 +2823,21 @@ static Vector512 CreateVector512T(int i) => if (Vector256.IsHardwareAccelerated && Vector256.IsSupported && x.Length >= Vector256.Count) { + Debug.Assert(sizeof(T) is 1 or 2 or 4 or 8); + [MethodImpl(MethodImplOptions.AggressiveInlining)] static Vector256 CreateVector256T(int i) => sizeof(T) == sizeof(long) ? Vector256.Create((long)i).As() : sizeof(T) == sizeof(int) ? Vector256.Create(i).As() : sizeof(T) == sizeof(short) ? Vector256.Create((short)i).As() : - sizeof(T) == sizeof(byte) ? Vector256.Create((byte)i).As() : - throw new NotSupportedException(); + Vector256.Create((byte)i).As(); ref T xRef = ref MemoryMarshal.GetReference(x); Vector256 resultIndex = sizeof(T) == sizeof(long) ? Vector256.Create(0L, 1, 2, 3).As() : sizeof(T) == sizeof(int) ? Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7).As() : sizeof(T) == sizeof(short) ? Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As() : - sizeof(T) == sizeof(byte) ? Vector256.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As() : - throw new NotSupportedException(); + Vector256.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As(); Vector256 currentIndex = resultIndex; Vector256 increment = CreateVector256T(Vector256.Count); @@ -2893,10 +2892,9 @@ static Vector256 CreateVector256T(int i) => if (nanMask != Vector256.Zero) { int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); - return - sizeof(T) == sizeof(long) ? (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : - sizeof(T) == sizeof(int) ? (int)(object)currentIndex.As()[indexInVectorOfFirstMatch] : - throw new NotSupportedException(); + return typeof(T) == typeof(double) ? + (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : + (int)(object)currentIndex.As()[indexInVectorOfFirstMatch]; } } @@ -2909,21 +2907,21 @@ static Vector256 CreateVector256T(int i) => if (Vector128.IsHardwareAccelerated && Vector128.IsSupported && x.Length >= Vector128.Count) { + Debug.Assert(sizeof(T) is 1 or 2 or 4 or 8); + [MethodImpl(MethodImplOptions.AggressiveInlining)] static Vector128 CreateVector128T(int i) => sizeof(T) == sizeof(long) ? Vector128.Create((long)i).As() : sizeof(T) == sizeof(int) ? Vector128.Create(i).As() : sizeof(T) == sizeof(short) ? Vector128.Create((short)i).As() : - sizeof(T) == sizeof(byte) ? Vector128.Create((byte)i).As() : - throw new NotSupportedException(); + Vector128.Create((byte)i).As(); ref T xRef = ref MemoryMarshal.GetReference(x); Vector128 resultIndex = sizeof(T) == sizeof(long) ? Vector128.Create(0L, 1).As() : sizeof(T) == sizeof(int) ? Vector128.Create(0, 1, 2, 3).As() : sizeof(T) == sizeof(short) ? Vector128.Create(0, 1, 2, 3, 4, 5, 6, 7).As() : - sizeof(T) == sizeof(byte) ? Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As() : - throw new NotSupportedException(); + Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As(); Vector128 currentIndex = resultIndex; Vector128 increment = CreateVector128T(Vector128.Count); @@ -2978,10 +2976,9 @@ static Vector128 CreateVector128T(int i) => if (nanMask != Vector128.Zero) { int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); - return - sizeof(T) == sizeof(long) ? (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : - sizeof(T) == sizeof(int) ? (int)(object)currentIndex.As()[indexInVectorOfFirstMatch] : - throw new NotSupportedException(); + return typeof(T) == typeof(double) ? + (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : + (int)(object)currentIndex.As()[indexInVectorOfFirstMatch]; } } @@ -9820,6 +9817,10 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) } /// y / x + /// + /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, + /// this can be deleted and the relevant call site can switch to using . + /// internal readonly struct InvertedDivideOperator : IBinaryOperator where T : IDivisionOperators { public static bool Vectorizable => true; From 86301dffe8876bfaab63f2b72be009bd78a4fa28 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 21:33:30 -0500 Subject: [PATCH 05/14] Simplify CopySignOperator --- .../netcore/TensorPrimitives.netcore.cs | 30 ++++--------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index e7450e677097b..574a0626d1437 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -14122,12 +14122,6 @@ public static Vector512 Invoke(Vector512 x) private readonly struct CopySignOperator : IBinaryOperator where T : INumber { - // This method is required to work for all inputs, including NaN, so we operate on the raw bits. - // Remove the sign from x, and remove everything but the sign from y. - // Then OR them to get the correct sign. - private const int SingleSignMask = 1 << 31; - private const long DoubleSignMask = 1L << 63; - public static bool Vectorizable => true; public static T Invoke(T x, T y) => T.CopySign(x, y); @@ -14136,16 +14130,12 @@ public static Vector128 Invoke(Vector128 x, Vector128 y) { if (typeof(T) == typeof(float)) { - return - ((x.AsInt32() & Vector128.Create(~SingleSignMask)) | - y.AsInt32() & Vector128.Create(SingleSignMask)).As(); + return Vector128.ConditionalSelect(Vector128.Create(-0.0f).As(), y, x); } if (typeof(T) == typeof(double)) { - return - ((x.AsInt64() & Vector128.Create(~DoubleSignMask)) | - y.AsInt64() & Vector128.Create(DoubleSignMask)).As(); + return Vector128.ConditionalSelect(Vector128.Create(-0.0d).As(), y, x); } if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint)) @@ -14168,16 +14158,12 @@ public static Vector256 Invoke(Vector256 x, Vector256 y) { if (typeof(T) == typeof(float)) { - return - ((x.AsInt32() & Vector256.Create(~SingleSignMask)) | - y.AsInt32() & Vector256.Create(SingleSignMask)).As(); + return Vector256.ConditionalSelect(Vector256.Create(-0.0f).As(), y, x); } if (typeof(T) == typeof(double)) { - return - ((x.AsInt64() & Vector256.Create(~DoubleSignMask)) | - y.AsInt64() & Vector256.Create(DoubleSignMask)).As(); + return Vector256.ConditionalSelect(Vector256.Create(-0.0d).As(), y, x); } if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint)) @@ -14200,16 +14186,12 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) { if (typeof(T) == typeof(float)) { - return - ((x.AsInt32() & Vector512.Create(~SingleSignMask)) | - y.AsInt32() & Vector512.Create(SingleSignMask)).As(); + return Vector512.ConditionalSelect(Vector512.Create(-0.0f).As(), y, x); } if (typeof(T) == typeof(double)) { - return - ((x.AsInt64() & Vector512.Create(~DoubleSignMask)) | - y.AsInt64() & Vector512.Create(DoubleSignMask)).As(); + return Vector512.ConditionalSelect(Vector512.Create(-0.0d).As(), y, x); } if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint)) From 1291b7ef30b6339a6e11e768caa1df527d081bee Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 21:38:40 -0500 Subject: [PATCH 06/14] Parameter renames --- .../ref/System.Numerics.Tensors.netcore.cs | 2 +- .../Numerics/Tensors/netcore/TensorPrimitives.T.cs | 12 ++++++------ .../Tensors/netcore/TensorPrimitives.netcore.cs | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index 8ce0932b2bfcc..fbcc424a38bb1 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -33,7 +33,7 @@ public static void Cbrt(System.ReadOnlySpan x, System.Span destination) public static void Ceiling(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPoint { } public static void ConvertToHalf(System.ReadOnlySpan source, System.Span destination) { } public static void ConvertToSingle(System.ReadOnlySpan source, System.Span destination) { } - public static void CopySign(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.INumber { } + public static void CopySign(System.ReadOnlySpan x, System.ReadOnlySpan sign, System.Span destination) where T : System.Numerics.INumber { } public static void CosPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void Cos(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void Cosh(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IHyperbolicFunctions { } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index 8e4c3d7d589e8..b599e23e08aea 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -417,20 +417,20 @@ public static void Ceiling(ReadOnlySpan x, Span destination) /// Computes the element-wise result of copying the sign from one number to another number in the specified tensors. /// The first tensor, represented as a span. - /// The second tensor, represented as a span. + /// The second tensor, represented as a span. /// The destination tensor, represented as a span. - /// Length of must be same as length of . + /// Length of must be same as length of . /// Destination is too short. /// and reference overlapping memory locations and do not begin at the same location. - /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = T.CopySign([i], [i]). + /// This method effectively computes [i] = T.CopySign([i], [i]). /// /// - public static void CopySign(ReadOnlySpan x, ReadOnlySpan y, Span destination) + public static void CopySign(ReadOnlySpan x, ReadOnlySpan sign, Span destination) where T : INumber => - InvokeSpanSpanIntoSpan>(x, y, destination); + InvokeSpanSpanIntoSpan>(x, sign, destination); /// Computes the element-wise cosine of the value in the specified tensor. /// The tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 574a0626d1437..5f8b6ba692ca5 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -11252,10 +11252,10 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) /// (x * (1 - z)) + (y * z) internal readonly struct LerpOperator : ITernaryOperator where T : IFloatingPointIeee754 { - public static T Invoke(T x, T y, T z) => T.Lerp(x, y, z); - public static Vector128 Invoke(Vector128 x, Vector128 y, Vector128 z) => (x * (Vector128.One - z)) + (y * z); - public static Vector256 Invoke(Vector256 x, Vector256 y, Vector256 z) => (x * (Vector256.One - z)) + (y * z); - public static Vector512 Invoke(Vector512 x, Vector512 y, Vector512 z) => (x * (Vector512.One - z)) + (y * z); + public static T Invoke(T x, T y, T amount) => T.Lerp(x, y, amount); + public static Vector128 Invoke(Vector128 x, Vector128 y, Vector128 amount) => (x * (Vector128.One - amount)) + (y * amount); + public static Vector256 Invoke(Vector256 x, Vector256 y, Vector256 amount) => (x * (Vector256.One - amount)) + (y * amount); + public static Vector512 Invoke(Vector512 x, Vector512 y, Vector512 amount) => (x * (Vector512.One - amount)) + (y * amount); } /// x From eab13abcc61d4ae8440b845e237c246fad105833 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 21:52:16 -0500 Subject: [PATCH 07/14] Add scalar overloads of Atan2 and Atan2Pi --- .../ref/System.Numerics.Tensors.netcore.cs | 4 + .../Tensors/netcore/TensorPrimitives.T.cs | 76 +++++++++++++++++++ .../netcore/TensorPrimitives.netcore.cs | 30 ++++++++ .../tests/TensorPrimitives.Generic.cs | 72 +++++++++++++++++- 4 files changed, 181 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index fbcc424a38bb1..c624b8d85b7bf 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -21,7 +21,11 @@ public static void Asinh(System.ReadOnlySpan x, System.Span destination public static void AsinPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void Asin(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void Atan2Pi(System.ReadOnlySpan y, System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Atan2Pi(System.ReadOnlySpan y, T x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Atan2Pi(T y, System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } public static void Atan2(System.ReadOnlySpan y, System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Atan2(System.ReadOnlySpan y, T x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Atan2(T y, System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } public static void Atanh(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IHyperbolicFunctions { } public static void AtanPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void Atan(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index b599e23e08aea..8aae10dba371b 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -214,6 +214,44 @@ public static void Atan2(ReadOnlySpan y, ReadOnlySpan x, Span destin where T : IFloatingPointIeee754 => InvokeSpanSpanIntoSpan>(y, x, destination); + /// Computes the element-wise arc-tangent for the quotient of two values in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Atan2([i], ). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Atan2(ReadOnlySpan y, T x, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanScalarIntoSpan>(y, x, destination); + + /// Computes the element-wise arc-tangent for the quotient of two values in the specified tensors. + /// The first tensor, represented as a scalar. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Atan2(, [i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Atan2(T y, ReadOnlySpan x, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanScalarIntoSpan>(x, y, destination); + /// Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi. /// The first tensor, represented as a span. /// The second tensor, represented as a span. @@ -235,6 +273,44 @@ public static void Atan2Pi(ReadOnlySpan y, ReadOnlySpan x, Span dest where T : IFloatingPointIeee754 => InvokeSpanSpanIntoSpan>(y, x, destination); + /// Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Atan2([i], ). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Atan2Pi(ReadOnlySpan y, T x, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanScalarIntoSpan>(y, x, destination); + + /// Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi. + /// The first tensor, represented as a scalar. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Atan2(, [i]). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Atan2Pi(T y, ReadOnlySpan x, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanScalarIntoSpan>(x, y, destination); + /// Computes the element-wise addition of numbers in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 5f8b6ba692ca5..2de8329a17927 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -12158,6 +12158,21 @@ public static Vector512 Invoke(Vector512 x) public static Vector512 Invoke(Vector512 y, Vector512 x) => throw new NotSupportedException(); } + /// T.Atan2(x, y) + /// + /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, + /// this can be deleted and the relevant call site can switch to using . + /// + internal readonly struct InvertedAtan2Operator : IBinaryOperator + where T : IFloatingPointIeee754 + { + public static bool Vectorizable => Atan2Operator.Vectorizable; + public static T Invoke(T y, T x) => T.Atan2(x, y); + public static Vector128 Invoke(Vector128 y, Vector128 x) => Atan2Operator.Invoke(x, y); + public static Vector256 Invoke(Vector256 y, Vector256 x) => Atan2Operator.Invoke(x, y); + public static Vector512 Invoke(Vector512 y, Vector512 x) => Atan2Operator.Invoke(x, y); + } + /// T.Atan2Pi(y, x) internal readonly struct Atan2PiOperator : IBinaryOperator where T : IFloatingPointIeee754 @@ -12169,6 +12184,21 @@ public static Vector512 Invoke(Vector512 x) public static Vector512 Invoke(Vector512 y, Vector512 x) => Atan2Operator.Invoke(y, x) / Vector512.Create(T.Pi); } + /// T.Atan2Pi(x, y) + /// + /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, + /// this can be deleted and the relevant call site can switch to using . + /// + internal readonly struct InvertedAtan2PiOperator : IBinaryOperator + where T : IFloatingPointIeee754 + { + public static bool Vectorizable => Atan2PiOperator.Vectorizable; + public static T Invoke(T y, T x) => T.Atan2Pi(x, y); + public static Vector128 Invoke(Vector128 y, Vector128 x) => Atan2PiOperator.Invoke(x, y); + public static Vector256 Invoke(Vector256 y, Vector256 x) => Atan2PiOperator.Invoke(x, y); + public static Vector512 Invoke(Vector512 y, Vector512 x) => Atan2PiOperator.Invoke(x, y); + } + /// T.Cos(x) internal readonly struct CosOperator : IUnaryOperator where T : ITrigonometricFunctions diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index ceddea02b9f2b..e9387735bb132 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -363,6 +363,8 @@ public void SpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSpanD #region Span,Scalar -> Destination public static IEnumerable SpanScalarDestinationFunctionsToTest() { + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Atan2), new Func(T.Atan2) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Atan2Pi), new Func(T.Atan2Pi) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Pow), new Func(T.Pow) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Log), new Func(T.Log) }; } @@ -428,6 +430,74 @@ public void SpanScalarDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSca } #endregion + #region Scalar,Span -> Destination + public static IEnumerable ScalarSpanFloatDestinationFunctionsToTest() + { + yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Atan2), new Func(T.Atan2) }; + yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Atan2Pi), new Func(T.Atan2Pi) }; + } + + [Theory] + [MemberData(nameof(ScalarSpanFloatDestinationFunctionsToTest))] + public void SpanScalarFloatDestination_AllLengths(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + T x = NextRandom(); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x, y[i]), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(ScalarSpanFloatDestinationFunctionsToTest))] + public void SpanScalarFloatDestination_InPlace(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + T x = NextRandom(); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + T[] yOrig = y.Span.ToArray(); + + tensorPrimitivesMethod(x, y, y); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x, yOrig[i]), y[i]); + } + }); + } + + [Theory] + [MemberData(nameof(ScalarSpanFloatDestinationFunctionsToTest))] + public void SpanScalarFloatDestination_ThrowsForTooShortDestination(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + T x = NextRandom(); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, destination)); + }); + } + + [Theory] + [MemberData(nameof(ScalarSpanFloatDestinationFunctionsToTest))] + public void SpanScalarFloatDestination_ThrowsForOverlapppingInputsWithOutputs(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(default, array.AsSpan(1, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(default, array.AsSpan(1, 2), array.AsSpan(2, 2))); + } + #endregion + #region Span,Int,Span -> Destination public static IEnumerable SpanIntDestinationFunctionsToTest() { @@ -588,7 +658,7 @@ public void SpanSpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanS } #endregion - #region Span -> Destination, Destination + #region Span -> Destination,Destination public static IEnumerable SpanDestinationDestinationFunctionsToTest() { yield return new object[] { new SpanDestinationDestinationDelegate(TensorPrimitives.SinCos), new Func(T.SinCos) }; From 8b904b40088a05a81fcf97bf1b19637831f1e070 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 21:58:43 -0500 Subject: [PATCH 08/14] Add CopySign scalar overload --- .../ref/System.Numerics.Tensors.netcore.cs | 1 + .../Tensors/netcore/TensorPrimitives.T.cs | 15 +++++++++++++++ .../tests/TensorPrimitives.Generic.cs | 15 +++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index c624b8d85b7bf..14509ab370330 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -38,6 +38,7 @@ public static void Ceiling(System.ReadOnlySpan x, System.Span destinati public static void ConvertToHalf(System.ReadOnlySpan source, System.Span destination) { } public static void ConvertToSingle(System.ReadOnlySpan source, System.Span destination) { } public static void CopySign(System.ReadOnlySpan x, System.ReadOnlySpan sign, System.Span destination) where T : System.Numerics.INumber { } + public static void CopySign(System.ReadOnlySpan x, T sign, System.Span destination) where T : System.Numerics.INumber { } public static void CosPi(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void Cos(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ITrigonometricFunctions { } public static void Cosh(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IHyperbolicFunctions { } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index 8aae10dba371b..0d0b5c98910aa 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -508,6 +508,21 @@ public static void CopySign(ReadOnlySpan x, ReadOnlySpan sign, Span where T : INumber => InvokeSpanSpanIntoSpan>(x, sign, destination); + /// Computes the element-wise result of copying the sign from one number to another number in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.CopySign([i], [i]). + /// + /// + public static void CopySign(ReadOnlySpan x, T sign, Span destination) + where T : INumber => + InvokeSpanScalarIntoSpan>(x, sign, destination); + /// Computes the element-wise cosine of the value in the specified tensor. /// The tensor, represented as a span. /// The destination tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index e9387735bb132..3c60d67d1fa53 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -365,6 +365,7 @@ public static IEnumerable SpanScalarDestinationFunctionsToTest() { yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Atan2), new Func(T.Atan2) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Atan2Pi), new Func(T.Atan2Pi) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.CopySign), new Func(T.CopySign) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Pow), new Func(T.Pow) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Log), new Func(T.Log) }; } @@ -1435,6 +1436,15 @@ public void CopySign_AllLengths() { AssertEqualTolerance(T.CopySign(x[i], y[i]), destination[i]); } + + if (tensorLength > 0) + { + TensorPrimitives.CopySign(x, y[0], destination); + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(T.CopySign(x[i], y[0]), destination[i]); + } + } }); } @@ -1481,6 +1491,7 @@ public void CopySign_ThrowsForTooShortDestination() using BoundedMemory destination = CreateTensor(tensorLength - 1); AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(x, y, destination)); + AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(x, y[0], destination)); }); } @@ -1488,10 +1499,14 @@ public void CopySign_ThrowsForTooShortDestination() public void CopySign_ThrowsForOverlapppingInputsWithOutputs() { T[] array = new T[10]; + AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(0, 2))); AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(2, 2))); AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(4, 2))); AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(6, 2))); + + AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), default(T), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => TensorPrimitives.CopySign(array.AsSpan(1, 2), default(T), array.AsSpan(2, 2))); } #endregion } From 9b1ccbed75e6ff81bd0429f261aea32093fe4629 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 22:04:58 -0500 Subject: [PATCH 09/14] Add Ieee754Remainder scalar overloads --- .../ref/System.Numerics.Tensors.netcore.cs | 2 ++ .../Tensors/netcore/TensorPrimitives.T.cs | 33 ++++++++++++++++++- .../netcore/TensorPrimitives.netcore.cs | 14 ++++++++ .../tests/TensorPrimitives.Generic.cs | 2 ++ 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index 14509ab370330..b539905c0ee3a 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -58,6 +58,8 @@ public static void ExpM1(System.ReadOnlySpan x, System.Span destination public static void Floor(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPoint { } public static void Hypot(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IRootFunctions { } public static void Ieee754Remainder(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Ieee754Remainder(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Ieee754Remainder(T x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } public static void ILogB(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } public static int IndexOfMaxMagnitude(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } public static int IndexOfMax(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index 0d0b5c98910aa..02d075738fe22 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -915,13 +915,44 @@ public static void Hypot(ReadOnlySpan x, ReadOnlySpan y, Span destin /// and reference overlapping memory locations and do not begin at the same location. /// /// - /// This method effectively computes [i] = T.Ieee754Remainder([i], [i]). + /// This method effectively computes [i] = T.Ieee754Remainder([i], [i]). /// /// public static void Ieee754Remainder(ReadOnlySpan x, ReadOnlySpan y, Span destination) where T : IFloatingPointIeee754 => InvokeSpanSpanIntoSpan>(x, y, destination); + /// Computes the element-wise remainder of the numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Ieee754Remainder([i], ). + /// + /// + public static void Ieee754Remainder(ReadOnlySpan x, T y, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanScalarIntoSpan>(x, y, destination); + + /// Computes the element-wise remainder of the numbers in the specified tensors. + /// The first tensor, represented as a scalar. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Ieee754Remainder(, [i]). + /// + /// + public static void Ieee754Remainder(T x, ReadOnlySpan y, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanScalarIntoSpan>(y, x, destination); + /// Computes the element-wise integer logarithm of numbers in the specified tensor. /// The tensor, represented as a span. /// The destination tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 2de8329a17927..c297630a90f6d 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -9840,6 +9840,20 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector512 Invoke(Vector512 x, Vector512 y) => throw new NotSupportedException(); } + /// T.Ieee754Remainder(y, x) + /// + /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, + /// this can be deleted and the relevant call site can switch to using . + /// + internal readonly struct InvertedIeee754RemainderOperator : IBinaryOperator where T : IFloatingPointIeee754 + { + public static bool Vectorizable => Ieee754RemainderOperator.Vectorizable; + public static T Invoke(T x, T y) => T.Ieee754Remainder(y, x); + public static Vector128 Invoke(Vector128 x, Vector128 y) => Ieee754RemainderOperator.Invoke(y, x); + public static Vector256 Invoke(Vector256 x, Vector256 y) => Ieee754RemainderOperator.Invoke(y, x); + public static Vector512 Invoke(Vector512 x, Vector512 y) => Ieee754RemainderOperator.Invoke(y, x); + } + // Ieee754Remainder internal readonly struct ReciprocalOperator : IUnaryOperator where T : IFloatingPoint diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index 3c60d67d1fa53..0e0c151889f79 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -366,6 +366,7 @@ public static IEnumerable SpanScalarDestinationFunctionsToTest() yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Atan2), new Func(T.Atan2) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Atan2Pi), new Func(T.Atan2Pi) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.CopySign), new Func(T.CopySign) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Ieee754Remainder), new Func(T.Ieee754Remainder) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Pow), new Func(T.Pow) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Log), new Func(T.Log) }; } @@ -436,6 +437,7 @@ public static IEnumerable ScalarSpanFloatDestinationFunctionsToTest() { yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Atan2), new Func(T.Atan2) }; yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Atan2Pi), new Func(T.Atan2Pi) }; + yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Ieee754Remainder), new Func(T.Ieee754Remainder) }; } [Theory] From 90c6a53540616bb1219e976d3c2ef41b45493f2b Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 22:20:02 -0500 Subject: [PATCH 10/14] Add Lerp scalar overloads --- .../ref/System.Numerics.Tensors.netcore.cs | 2 + .../Tensors/netcore/TensorPrimitives.T.cs | 42 +++ .../tests/TensorPrimitives.Generic.cs | 270 ++++++++++-------- .../tests/TensorPrimitivesTests.cs | 2 + 4 files changed, 192 insertions(+), 124 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index b539905c0ee3a..99b882e98a041 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -67,6 +67,8 @@ public static void ILogB(System.ReadOnlySpan x, System.Span destinati public static int IndexOfMin(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } public static void LeadingZeroCount(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IBinaryInteger { } public static void Lerp(System.ReadOnlySpan x, System.ReadOnlySpan y, System.ReadOnlySpan amount, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Lerp(System.ReadOnlySpan x, System.ReadOnlySpan y, T amount, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } + public static void Lerp(System.ReadOnlySpan x, T y, System.ReadOnlySpan amount, System.Span destination) where T : System.Numerics.IFloatingPointIeee754 { } public static void Log2(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } public static void Log2P1(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } public static void LogP1(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index 02d075738fe22..749012a7dba09 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -1084,6 +1084,48 @@ public static void Lerp(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan where T : IFloatingPointIeee754 => InvokeSpanSpanSpanIntoSpan>(x, y, amount, destination); + /// Computes the element-wise linear interpolation between two values based on the given weight in the specified tensors of numbers. + /// The first tensor, represented as a span. + /// The second tensor, represented as a span. + /// The third tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Lerp([i], [i], ). + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void Lerp(ReadOnlySpan x, ReadOnlySpan y, T amount, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanSpanScalarIntoSpan>(x, y, amount, destination); + + /// Computes the element-wise linear interpolation between two values based on the given weight in the specified tensors of numbers. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The third tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Length of must be same as length of . + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Lerp([i], , [i]). + /// + /// + /// If either of the element-wise input values is equal to , the resulting element-wise value is also NaN. + /// + /// + public static void Lerp(ReadOnlySpan x, T y, ReadOnlySpan amount, Span destination) + where T : IFloatingPointIeee754 => + InvokeSpanScalarSpanIntoSpan>(x, y, amount, destination); + /// Computes the element-wise natural (base e) logarithm of numbers in the specified tensor. /// The tensor, represented as a span. /// The destination tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index 0e0c151889f79..3f0b238d4cff9 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -661,6 +661,152 @@ public void SpanSpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanS } #endregion + #region Span,Span,Scalar -> Destination + public static IEnumerable SpanSpanScalarDestinationFunctionsToTest() + { + yield return new object[] { new SpanSpanScalarDestinationDelegate(TensorPrimitives.FusedMultiplyAdd), new Func(T.FusedMultiplyAdd) }; + yield return new object[] { new SpanSpanScalarDestinationDelegate(TensorPrimitives.Lerp), new Func(T.Lerp) }; + } + + [Theory] + [MemberData(nameof(SpanSpanScalarDestinationFunctionsToTest))] + public void SpanSpanScalarDestination_AllLengths(SpanSpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + T z = NextRandom(); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, z, destination); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i], y[i], z), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanSpanScalarDestinationFunctionsToTest))] + public void SpanSpanScalarDestination_InPlace(SpanSpanScalarDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + T z = NextRandom(); + + tensorPrimitivesMethod(x, x, z, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i], xOrig[i], z), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanSpanScalarDestinationFunctionsToTest))] + public void SpanSpanScalarDestination_ThrowsForTooShortDestination(SpanSpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory y = CreateAndFillTensor(tensorLength); + T z = NextRandom(); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, z, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanSpanScalarDestinationFunctionsToTest))] + public void SpanSpanScalarDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSpanScalarDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(4, 2), default, array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(4, 2), default, array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(4, 2), default, array.AsSpan(3, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(4, 2), default, array.AsSpan(5, 2))); + } + #endregion + + #region Span,Scalar,Span -> Destination + public static IEnumerable SpanScalarSpanDestinationFunctionsToTest() + { + yield return new object[] { new SpanScalarSpanDestinationDelegate(TensorPrimitives.FusedMultiplyAdd), new Func(T.FusedMultiplyAdd) }; + yield return new object[] { new SpanScalarSpanDestinationDelegate(TensorPrimitives.Lerp), new Func(T.Lerp) }; + } + + [Theory] + [MemberData(nameof(SpanScalarSpanDestinationFunctionsToTest))] + public void SpanScalarSpanDestination_AllLengths(SpanScalarSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + using BoundedMemory z = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + tensorPrimitivesMethod(x, y, z, destination); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(x[i], y, z[i]), destination[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanScalarSpanDestinationFunctionsToTest))] + public void SpanScalarSpanDestination_InPlace(SpanScalarSpanDestinationDelegate tensorPrimitivesMethod, Func expectedMethod) + { + Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T[] xOrig = x.Span.ToArray(); + T y = NextRandom(); + + tensorPrimitivesMethod(x, y, x, x); + + for (int i = 0; i < tensorLength; i++) + { + AssertEqualTolerance(expectedMethod(xOrig[i], y, xOrig[i]), x[i]); + } + }); + } + + [Theory] + [MemberData(nameof(SpanScalarSpanDestinationFunctionsToTest))] + public void SpanScalarSpanDestination_ThrowsForTooShortDestination(SpanScalarSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + Assert.All(Helpers.TensorLengths, tensorLength => + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + T y = NextRandom(); + using BoundedMemory z = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(x, y, z, destination)); + }); + } + + [Theory] + [MemberData(nameof(SpanScalarSpanDestinationFunctionsToTest))] + public void SpanScalarSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanScalarSpanDestinationDelegate tensorPrimitivesMethod, Func _) + { + T[] array = new T[10]; + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(4, 2), array.AsSpan(0, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(4, 2), array.AsSpan(2, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(4, 2), array.AsSpan(3, 2))); + AssertExtensions.Throws("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(4, 2), array.AsSpan(5, 2))); + } + #endregion + #region Span -> Destination,Destination public static IEnumerable SpanDestinationDestinationFunctionsToTest() { @@ -859,130 +1005,6 @@ public void ILogB_ThrowsForTooShortDestination() } #endregion - #region FusedMultiplyAdd - [Fact] - public void FusedMultiplyAdd_TensorTensorScalar_AllLengths() - { - Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - using BoundedMemory y = CreateAndFillTensor(tensorLength); - T addend = NextRandom(); - using BoundedMemory destination = CreateTensor(tensorLength); - - TensorPrimitives.FusedMultiplyAdd(x, y, addend, destination); - - for (int i = 0; i < tensorLength; i++) - { - AssertEqualTolerance((x[i] * y[i]) + addend, destination[i]); - } - }); - } - - [Fact] - public void FusedMultiplyAdd_TensorTensorScalar_InPlace() - { - Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - T[] xOrig = x.Span.ToArray(); - T addend = NextRandom(); - - TensorPrimitives.FusedMultiplyAdd(x, x, addend, x); - - for (int i = 0; i < tensorLength; i++) - { - AssertEqualTolerance((xOrig[i] * xOrig[i]) + addend, x[i]); - } - }); - } - - [Fact] - public void FusedMultiplyAdd_TensorTensorScalar_ThrowsForTooShortDestination() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - using BoundedMemory y = CreateAndFillTensor(tensorLength); - T addend = NextRandom(); - using BoundedMemory destination = CreateTensor(tensorLength - 1); - - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(x, y, addend, destination)); - }); - } - - [Fact] - public void FusedMultiplyAdd_TensorTensorScalar_ThrowsForOverlapppingInputsWithOutputs() - { - T[] array = new T[10]; - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(0, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(2, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(3, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), array.AsSpan(4, 2), default(T), array.AsSpan(5, 2))); - } - - [Fact] - public void FusedMultiplyAdd_TensorScalarTensor() - { - Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - T y = NextRandom(); - using BoundedMemory addend = CreateAndFillTensor(tensorLength); - using BoundedMemory destination = CreateTensor(tensorLength); - - TensorPrimitives.FusedMultiplyAdd(x, y, addend, destination); - - for (int i = 0; i < tensorLength; i++) - { - AssertEqualTolerance((x[i] * y) + addend[i], destination[i]); - } - }); - } - - [Fact] - public void FusedMultiplyAdd_TensorScalarTensor_InPlace() - { - Assert.All(Helpers.TensorLengthsIncluding0, tensorLength => - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - T[] xOrig = x.Span.ToArray(); - T y = NextRandom(); - - TensorPrimitives.FusedMultiplyAdd(x, y, x, x); - - for (int i = 0; i < tensorLength; i++) - { - AssertEqualTolerance((xOrig[i] * y) + xOrig[i], x[i]); - } - }); - } - - [Fact] - public void FusedMultiplyAdd_TensorScalarTensor_ThrowsForTooShortDestination() - { - Assert.All(Helpers.TensorLengths, tensorLength => - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - T y = NextRandom(); - using BoundedMemory addend = CreateAndFillTensor(tensorLength); - using BoundedMemory destination = CreateTensor(tensorLength - 1); - - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(x, y, addend, destination)); - }); - } - - [Fact] - public void FusedMultiplyAdd_TensorScalarTensor_ThrowsForOverlapppingInputsWithOutputs() - { - T[] array = new T[10]; - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(0, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(2, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(3, 2))); - AssertExtensions.Throws("destination", () => TensorPrimitives.FusedMultiplyAdd(array.AsSpan(1, 2), default(T), array.AsSpan(4, 2), array.AsSpan(5, 2))); - } - #endregion - #region Round public static IEnumerable RoundData() { diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs index 747c67e915bea..8a9f9ff6b4de2 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs @@ -96,6 +96,8 @@ public abstract class TensorPrimitivesTests where T : unmanaged, IEquatable(ReadOnlySpan x, T2 y, Span destination); public delegate void ScalarSpanDestinationDelegate(T x, ReadOnlySpan y, Span destination); public delegate void SpanSpanSpanDestinationDelegate(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan z, Span destination); + public delegate void SpanSpanScalarDestinationDelegate(ReadOnlySpan x, ReadOnlySpan y, T z, Span destination); + public delegate void SpanScalarSpanDestinationDelegate(ReadOnlySpan x, T y, ReadOnlySpan z, Span destination); public delegate void SpanDestinationDestinationDelegate(ReadOnlySpan x, Span destination1, Span destination2); protected virtual bool IsFloatingPoint => typeof(T) == typeof(float) || typeof(T) == typeof(double); From 70637c514f68a4bd87968002e47ceed079025dfa Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 22:24:08 -0500 Subject: [PATCH 11/14] Add Pow scalar overload --- .../ref/System.Numerics.Tensors.netcore.cs | 1 + .../Tensors/netcore/TensorPrimitives.T.cs | 15 +++++++++++++++ .../Tensors/netcore/TensorPrimitives.netcore.cs | 15 +++++++++++++++ .../tests/TensorPrimitives.Generic.cs | 1 + 4 files changed, 32 insertions(+) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index 99b882e98a041..dd30502a8e82b 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -99,6 +99,7 @@ public static void OnesComplement(System.ReadOnlySpan x, System.Span de public static void PopCount(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IBinaryInteger { } public static void Pow(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IPowerFunctions { } public static void Pow(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IPowerFunctions { } + public static void Pow(T x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.IPowerFunctions { } public static T ProductOfDifferences(System.ReadOnlySpan x, System.ReadOnlySpan y) where T : System.Numerics.ISubtractionOperators, System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { throw null; } public static T ProductOfSums(System.ReadOnlySpan x, System.ReadOnlySpan y) where T : System.Numerics.IAdditionOperators, System.Numerics.IAdditiveIdentity, System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { throw null; } public static T Product(System.ReadOnlySpan x) where T : System.Numerics.IMultiplyOperators, System.Numerics.IMultiplicativeIdentity { throw null; } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index 749012a7dba09..b50729b4ae4c1 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -1744,6 +1744,21 @@ public static void Pow(ReadOnlySpan x, T y, Span destination) where T : IPowerFunctions => InvokeSpanScalarIntoSpan>(x, y, destination); + /// Computes the element-wise power of a number in a specified tensor raised to a number in another specified tensors. + /// The first tensor, represented as a scalar. + /// The second tensor, represented as a span. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = T.Pow(, [i]). + /// + /// + public static void Pow(T x, ReadOnlySpan y, Span destination) + where T : IPowerFunctions => + InvokeSpanScalarIntoSpan>(y, x, destination); + /// Computes the product of all elements in the specified non-empty tensor of numbers. /// The tensor, represented as a span. /// The result of multiplying all elements in . diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index c297630a90f6d..9e923af62ea95 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -12029,6 +12029,21 @@ public static Vector512 Invoke(Vector512 x) public static Vector512 Invoke(Vector512 x, Vector512 y) => throw new NotSupportedException(); } + /// T.Pow(y, x) + /// + /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, + /// this can be deleted and the relevant call site can switch to using . + /// + internal readonly struct InvertedPowOperator : IBinaryOperator + where T : IPowerFunctions + { + public static bool Vectorizable => PowOperator.Vectorizable; + public static T Invoke(T x, T y) => T.Pow(y, x); + public static Vector128 Invoke(Vector128 x, Vector128 y) => PowOperator.Invoke(y, x); + public static Vector256 Invoke(Vector256 x, Vector256 y) => PowOperator.Invoke(y, x); + public static Vector512 Invoke(Vector512 x, Vector512 y) => PowOperator.Invoke(y, x); + } + /// T.Sqrt(x) internal readonly struct SqrtOperator : IUnaryOperator where T : IRootFunctions diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index 3f0b238d4cff9..cd2ad8c039643 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -437,6 +437,7 @@ public static IEnumerable ScalarSpanFloatDestinationFunctionsToTest() { yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Atan2), new Func(T.Atan2) }; yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Atan2Pi), new Func(T.Atan2Pi) }; + yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Pow), new Func(T.Pow) }; yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Ieee754Remainder), new Func(T.Ieee754Remainder) }; } From 83a2b22f1748a37f5ef18400680be2719b96106f Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Jan 2024 22:35:24 -0500 Subject: [PATCH 12/14] Consolidate inverted operators --- .../Tensors/netcore/TensorPrimitives.T.cs | 12 +- .../netcore/TensorPrimitives.netcore.cs | 106 ++++-------------- 2 files changed, 29 insertions(+), 89 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index b50729b4ae4c1..d582f40243310 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -250,7 +250,7 @@ public static void Atan2(ReadOnlySpan y, T x, Span destination) /// public static void Atan2(T y, ReadOnlySpan x, Span destination) where T : IFloatingPointIeee754 => - InvokeSpanScalarIntoSpan>(x, y, destination); + InvokeScalarSpanIntoSpan>(y, x, destination); /// Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi. /// The first tensor, represented as a span. @@ -309,7 +309,7 @@ public static void Atan2Pi(ReadOnlySpan y, T x, Span destination) /// public static void Atan2Pi(T y, ReadOnlySpan x, Span destination) where T : IFloatingPointIeee754 => - InvokeSpanScalarIntoSpan>(x, y, destination); + InvokeScalarSpanIntoSpan>(y, x, destination); /// Computes the element-wise addition of numbers in the specified tensors. /// The first tensor, represented as a span. @@ -733,7 +733,7 @@ public static void Divide(ReadOnlySpan x, T y, Span destination) /// public static void Divide(T x, ReadOnlySpan y, Span destination) where T : IDivisionOperators => - InvokeSpanScalarIntoSpan>(y, x, destination); + InvokeScalarSpanIntoSpan>(x, y, destination); /// Computes the dot product of two tensors containing numbers. /// The first tensor, represented as a span. @@ -951,7 +951,7 @@ public static void Ieee754Remainder(ReadOnlySpan x, T y, Span destinati /// public static void Ieee754Remainder(T x, ReadOnlySpan y, Span destination) where T : IFloatingPointIeee754 => - InvokeSpanScalarIntoSpan>(y, x, destination); + InvokeScalarSpanIntoSpan>(x, y, destination); /// Computes the element-wise integer logarithm of numbers in the specified tensor. /// The tensor, represented as a span. @@ -1757,7 +1757,7 @@ public static void Pow(ReadOnlySpan x, T y, Span destination) /// public static void Pow(T x, ReadOnlySpan y, Span destination) where T : IPowerFunctions => - InvokeSpanScalarIntoSpan>(y, x, destination); + InvokeScalarSpanIntoSpan>(x, y, destination); /// Computes the product of all elements in the specified non-empty tensor of numbers. /// The tensor, represented as a span. @@ -2460,7 +2460,7 @@ public static void Subtract(ReadOnlySpan x, T y, Span destination) /// public static void Subtract(T x, ReadOnlySpan y, Span destination) where T : ISubtractionOperators => - InvokeSpanScalarIntoSpan>(y, x, destination); + InvokeScalarSpanIntoSpan>(x, y, destination); /// Computes the sum of all elements in the specified tensor of numbers. /// The tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 9e923af62ea95..85535d85f721d 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -4928,6 +4928,19 @@ static void VectorizedSmall8(ref T xRef, ref T yRef, ref T dRef, nuint remainder } } + /// + /// Performs an element-wise operation on and , + /// and writes the results to . + /// + /// The element type. + /// + /// Specifies the operation to perform on each element loaded from with . + /// + private static void InvokeScalarSpanIntoSpan( + T x, ReadOnlySpan y, Span destination) + where TBinaryOperator : struct, IBinaryOperator => + InvokeSpanScalarIntoSpan, InvertedBinaryOperator>(y, x, destination); + /// /// Performs an element-wise operation on and , /// and writes the results to . @@ -9739,6 +9752,16 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)) public static T IdentityValue => T.AdditiveIdentity; } + private readonly struct InvertedBinaryOperator : IBinaryOperator + where TOperator : IBinaryOperator + { + public static bool Vectorizable => TOperator.Vectorizable; + public static T Invoke(T x, T y) => TOperator.Invoke(y, x); + public static Vector128 Invoke(Vector128 x, Vector128 y) => TOperator.Invoke(y, x); + public static Vector256 Invoke(Vector256 x, Vector256 y) => TOperator.Invoke(y, x); + public static Vector512 Invoke(Vector512 x, Vector512 y) => TOperator.Invoke(y, x); + } + /// x - y internal readonly struct SubtractOperator : IBinaryOperator where T : ISubtractionOperators { @@ -9749,16 +9772,6 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)) public static Vector512 Invoke(Vector512 x, Vector512 y) => x - y; } - /// y - x - internal readonly struct InvertedSubtractOperator : IBinaryOperator where T : ISubtractionOperators - { - public static bool Vectorizable => true; - public static T Invoke(T x, T y) => y - x; - public static Vector128 Invoke(Vector128 x, Vector128 y) => y - x; - public static Vector256 Invoke(Vector256 x, Vector256 y) => y - x; - public static Vector512 Invoke(Vector512 x, Vector512 y) => y - x; - } - /// (x - y) * (x - y) internal readonly struct SubtractSquaredOperator : IBinaryOperator where T : ISubtractionOperators, IMultiplyOperators { @@ -9816,20 +9829,6 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector512 Invoke(Vector512 x, Vector512 y) => x / y; } - /// y / x - /// - /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, - /// this can be deleted and the relevant call site can switch to using . - /// - internal readonly struct InvertedDivideOperator : IBinaryOperator where T : IDivisionOperators - { - public static bool Vectorizable => true; - public static T Invoke(T x, T y) => y / x; - public static Vector128 Invoke(Vector128 x, Vector128 y) => y / x; - public static Vector256 Invoke(Vector256 x, Vector256 y) => y / x; - public static Vector512 Invoke(Vector512 x, Vector512 y) => y / x; - } - /// T.Ieee754Remainder(x, y) internal readonly struct Ieee754RemainderOperator : IBinaryOperator where T : IFloatingPointIeee754 { @@ -9840,20 +9839,6 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector512 Invoke(Vector512 x, Vector512 y) => throw new NotSupportedException(); } - /// T.Ieee754Remainder(y, x) - /// - /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, - /// this can be deleted and the relevant call site can switch to using . - /// - internal readonly struct InvertedIeee754RemainderOperator : IBinaryOperator where T : IFloatingPointIeee754 - { - public static bool Vectorizable => Ieee754RemainderOperator.Vectorizable; - public static T Invoke(T x, T y) => T.Ieee754Remainder(y, x); - public static Vector128 Invoke(Vector128 x, Vector128 y) => Ieee754RemainderOperator.Invoke(y, x); - public static Vector256 Invoke(Vector256 x, Vector256 y) => Ieee754RemainderOperator.Invoke(y, x); - public static Vector512 Invoke(Vector512 x, Vector512 y) => Ieee754RemainderOperator.Invoke(y, x); - } - // Ieee754Remainder internal readonly struct ReciprocalOperator : IUnaryOperator where T : IFloatingPoint @@ -12029,21 +12014,6 @@ public static Vector512 Invoke(Vector512 x) public static Vector512 Invoke(Vector512 x, Vector512 y) => throw new NotSupportedException(); } - /// T.Pow(y, x) - /// - /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, - /// this can be deleted and the relevant call site can switch to using . - /// - internal readonly struct InvertedPowOperator : IBinaryOperator - where T : IPowerFunctions - { - public static bool Vectorizable => PowOperator.Vectorizable; - public static T Invoke(T x, T y) => T.Pow(y, x); - public static Vector128 Invoke(Vector128 x, Vector128 y) => PowOperator.Invoke(y, x); - public static Vector256 Invoke(Vector256 x, Vector256 y) => PowOperator.Invoke(y, x); - public static Vector512 Invoke(Vector512 x, Vector512 y) => PowOperator.Invoke(y, x); - } - /// T.Sqrt(x) internal readonly struct SqrtOperator : IUnaryOperator where T : IRootFunctions @@ -12187,21 +12157,6 @@ public static Vector512 Invoke(Vector512 x) public static Vector512 Invoke(Vector512 y, Vector512 x) => throw new NotSupportedException(); } - /// T.Atan2(x, y) - /// - /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, - /// this can be deleted and the relevant call site can switch to using . - /// - internal readonly struct InvertedAtan2Operator : IBinaryOperator - where T : IFloatingPointIeee754 - { - public static bool Vectorizable => Atan2Operator.Vectorizable; - public static T Invoke(T y, T x) => T.Atan2(x, y); - public static Vector128 Invoke(Vector128 y, Vector128 x) => Atan2Operator.Invoke(x, y); - public static Vector256 Invoke(Vector256 y, Vector256 x) => Atan2Operator.Invoke(x, y); - public static Vector512 Invoke(Vector512 y, Vector512 x) => Atan2Operator.Invoke(x, y); - } - /// T.Atan2Pi(y, x) internal readonly struct Atan2PiOperator : IBinaryOperator where T : IFloatingPointIeee754 @@ -12213,21 +12168,6 @@ public static Vector512 Invoke(Vector512 x) public static Vector512 Invoke(Vector512 y, Vector512 x) => Atan2Operator.Invoke(y, x) / Vector512.Create(T.Pi); } - /// T.Atan2Pi(x, y) - /// - /// This exists only to enable reusing InvokeSpanScalarIntoDestination. If we ever add an InvokeScalarSpanIntoDestination, - /// this can be deleted and the relevant call site can switch to using . - /// - internal readonly struct InvertedAtan2PiOperator : IBinaryOperator - where T : IFloatingPointIeee754 - { - public static bool Vectorizable => Atan2PiOperator.Vectorizable; - public static T Invoke(T y, T x) => T.Atan2Pi(x, y); - public static Vector128 Invoke(Vector128 y, Vector128 x) => Atan2PiOperator.Invoke(x, y); - public static Vector256 Invoke(Vector256 y, Vector256 x) => Atan2PiOperator.Invoke(x, y); - public static Vector512 Invoke(Vector512 y, Vector512 x) => Atan2PiOperator.Invoke(x, y); - } - /// T.Cos(x) internal readonly struct CosOperator : IUnaryOperator where T : ITrigonometricFunctions From e2174aa4ab6ae2bebc2709a79006b413a66401d3 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 20 Jan 2024 09:56:14 -0500 Subject: [PATCH 13/14] Add missing Max/Min scalar overloads --- .../ref/System.Numerics.Tensors.netcore.cs | 4 + .../Tensors/netcore/TensorPrimitives.T.cs | 85 +++++++++++++++++++ .../tests/TensorPrimitives.Generic.cs | 8 ++ 3 files changed, 97 insertions(+) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index dd30502a8e82b..031ba109ba6b5 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -79,12 +79,16 @@ public static void Log10P1(System.ReadOnlySpan x, System.Span destinati public static void Log10(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.ILogarithmicFunctions { } public static T MaxMagnitude(System.ReadOnlySpan x) where T : System.Numerics.INumberBase { throw null; } public static void MaxMagnitude(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.INumberBase { } + public static void MaxMagnitude(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.INumberBase { } public static T Max(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } public static void Max(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.INumber { } + public static void Max(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.INumber { } public static T MinMagnitude(System.ReadOnlySpan x) where T : System.Numerics.INumberBase { throw null; } public static void MinMagnitude(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.INumberBase { } + public static void MinMagnitude(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.INumberBase { } public static T Min(System.ReadOnlySpan x) where T : System.Numerics.INumber { throw null; } public static void Min(System.ReadOnlySpan x, System.ReadOnlySpan y, System.Span destination) where T : System.Numerics.INumber { } + public static void Min(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.INumber { } public static void MultiplyAdd(System.ReadOnlySpan x, System.ReadOnlySpan y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } public static void MultiplyAdd(System.ReadOnlySpan x, System.ReadOnlySpan y, T addend, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } public static void MultiplyAdd(System.ReadOnlySpan x, T y, System.ReadOnlySpan addend, System.Span destination) where T : System.Numerics.IAdditionOperators, System.Numerics.IMultiplyOperators { } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index d582f40243310..fdf8e37fb06e5 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -1353,6 +1353,29 @@ public static void Max(ReadOnlySpan x, ReadOnlySpan y, Span destinat where T : INumber => InvokeSpanSpanIntoSpan>(x, y, destination); + /// Computes the element-wise maximum of the numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Max([i], ). + /// + /// + /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to , + /// that value is stored as the result. Positive 0 is considered greater than negative 0. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Max(ReadOnlySpan x, T y, Span destination) + where T : INumber => + InvokeSpanScalarIntoSpan>(x, y, destination); + /// Searches for the number with the largest magnitude in the specified tensor. /// The tensor, represented as a span. /// The element in with the largest magnitude (absolute value). @@ -1391,6 +1414,23 @@ public static void MaxMagnitude(ReadOnlySpan x, ReadOnlySpan y, Span where T : INumberBase => InvokeSpanSpanIntoSpan>(x, y, destination); + /// Computes the element-wise number with the largest magnitude in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// This method effectively computes [i] = .MaxMagnitude([i], ). + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void MaxMagnitude(ReadOnlySpan x, T y, Span destination) + where T : INumberBase => + InvokeSpanScalarIntoSpan>(x, y, destination); + /// Searches for the smallest number in the specified tensor. /// The tensor, represented as a span. /// The minimum element in . @@ -1434,6 +1474,29 @@ public static void Min(ReadOnlySpan x, ReadOnlySpan y, Span destinat where T : INumber => InvokeSpanSpanIntoSpan>(x, y, destination); + /// Computes the element-wise minimum of the numbers in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// + /// + /// This method effectively computes [i] = .Max([i], ). + /// + /// + /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to , + /// that value is stored as the result. Positive 0 is considered greater than negative 0. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void Min(ReadOnlySpan x, T y, Span destination) + where T : INumber => + InvokeSpanScalarIntoSpan>(x, y, destination); + /// Searches for the number with the smallest magnitude in the specified tensor. /// The tensor, represented as a span. /// The element in with the smallest magnitude (absolute value). @@ -1477,6 +1540,28 @@ public static void MinMagnitude(ReadOnlySpan x, ReadOnlySpan y, Span where T : INumberBase => InvokeSpanSpanIntoSpan>(x, y, destination); + /// Computes the element-wise number with the smallest magnitude in the specified tensors. + /// The first tensor, represented as a span. + /// The second tensor, represented as a scalar. + /// The destination tensor, represented as a span. + /// Destination is too short. + /// and reference overlapping memory locations and do not begin at the same location. + /// This method effectively computes [i] = .MinMagnitude([i], ). + /// + /// + /// The determination of the maximum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If either value is equal to , + /// that value is stored as the result. If the two values have the same magnitude and one is positive and the other is negative, + /// the negative value is considered to have the smaller magnitude. + /// + /// + /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different + /// operating systems or architectures. + /// + /// + public static void MinMagnitude(ReadOnlySpan x, T y, Span destination) + where T : INumberBase => + InvokeSpanScalarIntoSpan>(x, y, destination); + /// Computes the element-wise product of numbers in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index cd2ad8c039643..65c45de4c4255 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -369,6 +369,10 @@ public static IEnumerable SpanScalarDestinationFunctionsToTest() yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Ieee754Remainder), new Func(T.Ieee754Remainder) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Pow), new Func(T.Pow) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Log), new Func(T.Log) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Max), new Func(T.Max) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.MaxMagnitude), new Func(T.MaxMagnitude) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Min), new Func(T.Min) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.MinMagnitude), new Func(T.MinMagnitude) }; } [Theory] @@ -1299,6 +1303,10 @@ public static IEnumerable SpanScalarDestinationFunctionsToTest() { yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.BitwiseAnd), new Func((x, y) => x & y) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.BitwiseOr), new Func((x, y) => x | y) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Max), new Func(T.Max) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.MaxMagnitude), new Func(T.MaxMagnitude) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Min), new Func(T.Min) }; + yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.MinMagnitude), new Func(T.MinMagnitude) }; yield return new object[] { new SpanScalarDestinationDelegate(TensorPrimitives.Xor), new Func((x, y) => x ^ y) }; } From 14785e3ef047f2854e1dc62f4413150e44538151 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sun, 21 Jan 2024 17:39:20 -0500 Subject: [PATCH 14/14] Use ElementWiseSelect --- .../netcore/TensorPrimitives.netcore.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 85535d85f721d..1550e3d6bf7d4 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -10266,7 +10266,7 @@ public static void Invoke(ref Vector128 result, Vector128 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); Vector128 currentNegative = IsNegative(current); Vector128 sameSign = Vector128.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); - useResult |= equalMask & Vector128.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative); } else { @@ -10292,7 +10292,7 @@ public static void Invoke(ref Vector256 result, Vector256 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); Vector256 currentNegative = IsNegative(current); Vector256 sameSign = Vector256.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); - useResult |= equalMask & Vector256.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative); } else { @@ -10318,7 +10318,7 @@ public static void Invoke(ref Vector512 result, Vector512 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); Vector512 currentNegative = IsNegative(current); Vector512 sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); - useResult |= equalMask & Vector512.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative); } else { @@ -10369,7 +10369,7 @@ public static void Invoke(ref Vector128 result, Vector128 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); Vector128 currentNegative = IsNegative(current); Vector128 sameSign = Vector128.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); - useResult |= equalMask & Vector128.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative); } else { @@ -10396,7 +10396,7 @@ public static void Invoke(ref Vector256 result, Vector256 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); Vector256 currentNegative = IsNegative(current); Vector256 sameSign = Vector256.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); - useResult |= equalMask & Vector256.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative); } else { @@ -10423,7 +10423,7 @@ public static void Invoke(ref Vector512 result, Vector512 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current)); Vector512 currentNegative = IsNegative(current); Vector512 sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As(); - useResult |= equalMask & Vector512.ConditionalSelect(sameSign, lessThanIndexMask, currentNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative); } else { @@ -10477,7 +10477,7 @@ public static void Invoke(ref Vector128 result, Vector128 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); Vector128 resultNegative = IsNegative(result); Vector128 sameSign = Vector128.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); - useResult |= equalMask & Vector128.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative); } else { @@ -10503,7 +10503,7 @@ public static void Invoke(ref Vector256 result, Vector256 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); Vector256 resultNegative = IsNegative(result); Vector256 sameSign = Vector256.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); - useResult |= equalMask & Vector256.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative); } else { @@ -10529,7 +10529,7 @@ public static void Invoke(ref Vector512 result, Vector512 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); Vector512 resultNegative = IsNegative(result); Vector512 sameSign = Vector512.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); - useResult |= equalMask & Vector512.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative); } else { @@ -10580,7 +10580,7 @@ public static void Invoke(ref Vector128 result, Vector128 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); Vector128 resultNegative = IsNegative(result); Vector128 sameSign = Vector128.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); - useResult |= equalMask & Vector128.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative); } else { @@ -10607,7 +10607,7 @@ public static void Invoke(ref Vector256 result, Vector256 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); Vector256 resultNegative = IsNegative(result); Vector256 sameSign = Vector256.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); - useResult |= equalMask & Vector256.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative); } else { @@ -10634,7 +10634,7 @@ public static void Invoke(ref Vector512 result, Vector512 current, ref Vec // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result)); Vector512 resultNegative = IsNegative(result); Vector512 sameSign = Vector512.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As(); - useResult |= equalMask & Vector512.ConditionalSelect(sameSign, lessThanIndexMask, resultNegative); + useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative); } else {