From 46a3c1278437d8de23aae8bca054e4eb1275739e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 2 Oct 2023 10:21:03 -0700 Subject: [PATCH 1/7] Add a way to support operations that can't be vectorized on netstandard --- .../Numerics/Tensors/TensorPrimitives.netstandard.cs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs index ac4ea2dfe9bef..5c2ecb0b547d7 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs @@ -97,7 +97,7 @@ private static float Aggregate( float result; - if (Vector.IsHardwareAccelerated && x.Length >= Vector.Count) + if (Vector.IsHardwareAccelerated && load.CanVectorize && x.Length >= Vector.Count) { ref float xRef = ref MemoryMarshal.GetReference(x); @@ -304,7 +304,7 @@ private static void InvokeSpanIntoSpan( ref float dRef = ref MemoryMarshal.GetReference(destination); int i = 0, oneVectorFromEnd; - if (Vector.IsHardwareAccelerated) + if (Vector.IsHardwareAccelerated && op.CanVectorize) { oneVectorFromEnd = x.Length - Vector.Count; if (oneVectorFromEnd >= 0) @@ -885,6 +885,7 @@ public Vector Invoke(Vector x, Vector y) private readonly struct NegateOperator : IUnaryOperator { + public bool CanVectorize => true; public float Invoke(float x) => -x; public Vector Invoke(Vector x) => -x; } @@ -903,24 +904,28 @@ public Vector Invoke(Vector x, Vector y) private readonly struct IdentityOperator : IUnaryOperator { + public bool CanVectorize => true; public float Invoke(float x) => x; public Vector Invoke(Vector x) => x; } private readonly struct SquaredOperator : IUnaryOperator { + public bool CanVectorize => true; public float Invoke(float x) => x * x; public Vector Invoke(Vector x) => x * x; } private readonly struct AbsoluteOperator : IUnaryOperator { + public bool CanVectorize => true; public float Invoke(float x) => MathF.Abs(x); public Vector Invoke(Vector x) => Vector.Abs(x); } private interface IUnaryOperator { + bool CanVectorize { get; } float Invoke(float x); Vector Invoke(Vector x); } From 1f3b10970e819d870cf5f4bc7bd215630634b968 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 2 Oct 2023 11:01:58 -0700 Subject: [PATCH 2/7] Updating TensorPrimitives.Log2 to be vectorized on .NET Core --- .../Numerics/Tensors/TensorPrimitives.cs | 16 +- .../Tensors/TensorPrimitives.netcore.cs | 151 ++++++++++++++++++ .../Tensors/TensorPrimitives.netstandard.cs | 12 ++ 3 files changed, 165 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs index cd4a33f8d60a9..aaafb74ec2a8c 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs @@ -598,20 +598,8 @@ public static void Log(ReadOnlySpan x, Span destination) /// operating systems or architectures. /// /// - public static void Log2(ReadOnlySpan x, Span destination) - { - if (x.Length > destination.Length) - { - ThrowHelper.ThrowArgument_DestinationTooShort(); - } - - ValidateInputOutputSpanNonOverlapping(x, destination); - - for (int i = 0; i < x.Length; i++) - { - destination[i] = Log2(x[i]); - } - } + public static void Log2(ReadOnlySpan x, Span destination) => + InvokeSpanIntoSpan(x, destination); /// Searches for the largest single-precision floating-point number in the specified tensor. /// The tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs index 6773771602212..b9652a5104c6c 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs @@ -2579,6 +2579,157 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + private readonly struct Log2Operator : IUnaryOperator + { + // This code is based on `vrs4_log2f` from amd/aocl-libm-ose + // Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Spec: + // log2f(x) + // = log2f(x) if x ∈ F and x > 0 + // = x if x = qNaN + // = 0 if x = 1 + // = -inf if x = (-0, 0} + // = NaN otherwise + // + // Assumptions/Expectations + // - Maximum ULP is observed to be at 4 + // - Some FPU Exceptions may not be available + // - Performance is at least 3x + // + // Implementation Notes: + // 1. Range Reduction: + // x = 2^n*(1+f) .... (1) + // where n is exponent and is an integer + // (1+f) is mantissa ∈ [1,2). i.e., 1 ≤ 1+f < 2 .... (2) + // + // From (1), taking log on both sides + // log2(x) = log2(2^n * (1+f)) + // = n + log2(1+f) .... (3) + // + // let z = 1 + f + // log2(z) = log2(k) + log2(z) - log2(k) + // log2(z) = log2(kz) - log2(k) + // + // From (2), range of z is [1, 2) + // by simply dividing range by 'k', z is in [1/k, 2/k) .... (4) + // Best choice of k is the one which gives equal and opposite values + // at extrema +- -+ + // 1 | 2 | + // --- - 1 = - |--- - 1 | + // k | k | .... (5) + // +- -+ + // + // Solving for k, k = 3/2, + // From (4), using 'k' value, range is therefore [-0.3333, 0.3333] + // + // 2. Polynomial Approximation: + // More information refer to tools/sollya/vrs4_logf.sollya + // + // 7th Deg - Error abs: 0x1.04c4ac98p-22 rel: 0x1.2216e6f8p-19 + + private const uint V_MIN = 0x00800000; + private const uint V_MAX = 0x7F800000; + private const uint V_MASK = 0x007FFFFF; + private const uint V_OFF = 0x3F2AAAAB; + + private const float C0 = 0.0f; + private const float C1 = 1.4426951f; + private const float C2 = -0.72134554f; + private const float C3 = 0.48089063f; + private const float C4 = -0.36084408f; + private const float C5 = 0.2888971f; + private const float C6 = -0.23594281f; + private const float C7 = 0.19948183f; + private const float C8 = -0.22616665f; + private const float C9 = 0.21228963f; + + public static float Invoke(float x) => MathF.Log2(x); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 vx = x.AsUInt32() - Vector128.Create(V_OFF); + Vector128 n = (vx >> 23).AsSingle(); + + vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF); + + Vector128 r = vx.AsSingle() - Vector128.One; + + Vector128 r2 = r * r; + Vector128 r4 = r2 * r2; + Vector128 r8 = r4 * r4; + + Vector128 poly = (Vector128.Create(C9) * r + Vector128.Create(C8)) * r8 + + (((Vector128.Create(C7) * r + Vector128.Create(C6)) * r2 + + (Vector128.Create(C5) * r + Vector128.Create(C4))) * r4 + + ((Vector128.Create(C3) * r + Vector128.Create(C2)) * r2 + + (Vector128.Create(C1) * r + Vector128.Create(C0)))); + + return Vector128.ConditionalSelect( + Vector128.GreaterThanOrEqual(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN)).AsSingle(), + x, + n + poly + ); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 vx = x.AsUInt32() - Vector256.Create(V_OFF); + Vector256 n = (vx >> 23).AsSingle(); + + vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF); + + Vector256 r = vx.AsSingle() - Vector256.One; + + Vector256 r2 = r * r; + Vector256 r4 = r2 * r2; + Vector256 r8 = r4 * r4; + + Vector256 poly = (Vector256.Create(C9) * r + Vector256.Create(C8)) * r8 + + (((Vector256.Create(C7) * r + Vector256.Create(C6)) * r2 + + (Vector256.Create(C5) * r + Vector256.Create(C4))) * r4 + + ((Vector256.Create(C3) * r + Vector256.Create(C2)) * r2 + + (Vector256.Create(C1) * r + Vector256.Create(C0)))); + + return Vector256.ConditionalSelect( + Vector256.GreaterThanOrEqual(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN)).AsSingle(), + x, + n + poly + ); + } + +#if NET8_0_OR_GREATER + public static Vector512 Invoke(Vector512 x) + { + Vector512 vx = x.AsUInt32() - Vector512.Create(V_OFF); + Vector512 n = (vx >> 23).AsSingle(); + + vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF); + + Vector512 r = vx.AsSingle() - Vector512.One; + + Vector512 r2 = r * r; + Vector512 r4 = r2 * r2; + Vector512 r8 = r4 * r4; + + Vector512 poly = (Vector512.Create(C9) * r + Vector512.Create(C8)) * r8 + + (((Vector512.Create(C7) * r + Vector512.Create(C6)) * r2 + + (Vector512.Create(C5) * r + Vector512.Create(C4))) * r4 + + ((Vector512.Create(C3) * r + Vector512.Create(C2)) * r2 + + (Vector512.Create(C1) * r + Vector512.Create(C0)))); + + return Vector512.ConditionalSelect( + Vector512.GreaterThanOrEqual(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN)).AsSingle(), + x, + n + poly + ); + } +#endif + } + private interface IUnaryOperator { static abstract float Invoke(float x); diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs index 5c2ecb0b547d7..e68c19497c92d 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs @@ -923,6 +923,18 @@ public Vector Invoke(Vector x, Vector y) public Vector Invoke(Vector x) => Vector.Abs(x); } + private readonly struct Log2Operator : IUnaryOperator + { + public bool CanVectorize => false; + public float Invoke(float x) => Log2(x); + + public Vector Invoke(Vector x) + { + // Vectorizing requires shift right support, which is .NET 7 or later + throw new NotImplementedException(); + } + } + private interface IUnaryOperator { bool CanVectorize { get; } From d73764c3ab8f6c169a5d768d6793e5056b39a594 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 2 Oct 2023 11:14:36 -0700 Subject: [PATCH 3/7] Update src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs Co-authored-by: Stephen Toub --- .../src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs index e68c19497c92d..3d2b89da452bc 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs @@ -926,6 +926,7 @@ public Vector Invoke(Vector x, Vector y) private readonly struct Log2Operator : IUnaryOperator { public bool CanVectorize => false; + public float Invoke(float x) => Log2(x); public Vector Invoke(Vector x) From 57f84a89daf2334277ddc2bd3b8499595b8b0ffe Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 2 Oct 2023 12:35:27 -0700 Subject: [PATCH 4/7] Ensure we do an arithmetic right shift in the Log2 vectorization --- .../src/System/Numerics/Tensors/TensorPrimitives.netcore.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs index b9652a5104c6c..707dfad6f2df1 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs @@ -2652,7 +2652,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector128 Invoke(Vector128 x) { Vector128 vx = x.AsUInt32() - Vector128.Create(V_OFF); - Vector128 n = (vx >> 23).AsSingle(); + Vector128 n = (vx.AsInt32() >> 23).AsSingle(); vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF); @@ -2678,7 +2678,7 @@ public static Vector128 Invoke(Vector128 x) public static Vector256 Invoke(Vector256 x) { Vector256 vx = x.AsUInt32() - Vector256.Create(V_OFF); - Vector256 n = (vx >> 23).AsSingle(); + Vector256 n = (vx.AsInt32() >> 23).AsSingle(); vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF); @@ -2705,7 +2705,7 @@ public static Vector256 Invoke(Vector256 x) public static Vector512 Invoke(Vector512 x) { Vector512 vx = x.AsUInt32() - Vector512.Create(V_OFF); - Vector512 n = (vx >> 23).AsSingle(); + Vector512 n = (vx.AsInt32() >> 23).AsSingle(); vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF); From ab7b2bd131f463141db9ff94030b96b63cea1308 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 2 Oct 2023 12:38:13 -0700 Subject: [PATCH 5/7] Ensure the code can compile on .NET 7 --- .../Numerics/Tensors/TensorPrimitives.netcore.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs index 707dfad6f2df1..45022eedc44d0 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs @@ -2652,11 +2652,11 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector128 Invoke(Vector128 x) { Vector128 vx = x.AsUInt32() - Vector128.Create(V_OFF); - Vector128 n = (vx.AsInt32() >> 23).AsSingle(); + Vector128 n = (Vector128.ShiftRightArithmetic(vx.AsInt32(), 23)).AsSingle(); vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF); - Vector128 r = vx.AsSingle() - Vector128.One; + Vector128 r = vx.AsSingle() - Vector128.Create(1.0f); Vector128 r2 = r * r; Vector128 r4 = r2 * r2; @@ -2678,11 +2678,11 @@ public static Vector128 Invoke(Vector128 x) public static Vector256 Invoke(Vector256 x) { Vector256 vx = x.AsUInt32() - Vector256.Create(V_OFF); - Vector256 n = (vx.AsInt32() >> 23).AsSingle(); + Vector256 n = (Vector256.ShiftRightArithmetic(vx.AsInt32(), 23)).AsSingle(); vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF); - Vector256 r = vx.AsSingle() - Vector256.One; + Vector256 r = vx.AsSingle() - Vector256.Create(1.0f); Vector256 r2 = r * r; Vector256 r4 = r2 * r2; @@ -2705,11 +2705,11 @@ public static Vector256 Invoke(Vector256 x) public static Vector512 Invoke(Vector512 x) { Vector512 vx = x.AsUInt32() - Vector512.Create(V_OFF); - Vector512 n = (vx.AsInt32() >> 23).AsSingle(); + Vector512 n = (Vector512.ShiftRightArithmetic(vx.AsInt32(), 23)).AsSingle(); vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF); - Vector512 r = vx.AsSingle() - Vector512.One; + Vector512 r = vx.AsSingle() - Vector512.Create(1.0f); Vector512 r2 = r * r; Vector512 r4 = r2 * r2; From 408df3e65a7e2d7d027a614fe88a1e81ebeee5b8 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 2 Oct 2023 14:07:18 -0700 Subject: [PATCH 6/7] Ensure that edge cases are properly handled and don't resolve to `x` --- .../Tensors/TensorPrimitives.netcore.cs | 120 +++++++++++++++--- 1 file changed, 105 insertions(+), 15 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs index 45022eedc44d0..6b867745b062d 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs @@ -2668,11 +2668,41 @@ public static Vector128 Invoke(Vector128 x) + ((Vector128.Create(C3) * r + Vector128.Create(C2)) * r2 + (Vector128.Create(C1) * r + Vector128.Create(C0)))); - return Vector128.ConditionalSelect( - Vector128.GreaterThanOrEqual(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN)).AsSingle(), - x, - n + poly - ); + Vector128 result = n + poly; + + // x < 1.1754944E-38 or x = INF or x = NAN + if (Vector128.GreaterThanOrEqualAny(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN))) + { + // log2(+0.0) == log2(-0.0) == -Infinity + result = Vector128.ConditionalSelect( + Vector128.Equals(Vector128.ShiftLeft(x.AsUInt32(), 1), Vector128.Zero).AsSingle(), + Vector128.Create(float.NegativeInfinity), + result + ); + + // log2(NaN) == NaN + result = Vector128.ConditionalSelect( + Vector128.Equals(x, x), + result, + x + ); + + // log2(-x) == NaN + result = Vector128.ConditionalSelect( + Vector128.LessThan(x.AsInt32(), Vector128.Zero).AsSingle(), + Vector128.Create(float.NaN), + result + ); + + // log2(+Infinity) == log2(-Infinity) == +Infinity + result = Vector128.ConditionalSelect( + Vector128.Equals(x.AsUInt32() & Vector128.Create(V_MAX), Vector128.Create(V_MAX)).AsSingle(), + Vector128.Create(float.PositiveInfinity), + result + ); + } + + return result; } public static Vector256 Invoke(Vector256 x) @@ -2694,11 +2724,41 @@ public static Vector256 Invoke(Vector256 x) + ((Vector256.Create(C3) * r + Vector256.Create(C2)) * r2 + (Vector256.Create(C1) * r + Vector256.Create(C0)))); - return Vector256.ConditionalSelect( - Vector256.GreaterThanOrEqual(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN)).AsSingle(), - x, - n + poly - ); + Vector256 result = n + poly; + + // x < 1.1754944E-38 or x = INF or x = NAN + if (Vector256.GreaterThanOrEqualAny(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN))) + { + // log2(+0.0) == log2(-0.0) == -Infinity + result = Vector256.ConditionalSelect( + Vector256.Equals(Vector256.ShiftLeft(x.AsUInt32(), 1), Vector256.Zero).AsSingle(), + Vector256.Create(float.NegativeInfinity), + result + ); + + // log2(NaN) == NaN + result = Vector256.ConditionalSelect( + Vector256.Equals(x, x), + result, + x + ); + + // log2(-x) == NaN + result = Vector256.ConditionalSelect( + Vector256.LessThan(x.AsInt32(), Vector256.Zero).AsSingle(), + Vector256.Create(float.NaN), + result + ); + + // log2(+Infinity) == log2(-Infinity) == +Infinity + result = Vector256.ConditionalSelect( + Vector256.Equals(x.AsUInt32() & Vector256.Create(V_MAX), Vector256.Create(V_MAX)).AsSingle(), + Vector256.Create(float.PositiveInfinity), + result + ); + } + + return result; } #if NET8_0_OR_GREATER @@ -2721,11 +2781,41 @@ public static Vector512 Invoke(Vector512 x) + ((Vector512.Create(C3) * r + Vector512.Create(C2)) * r2 + (Vector512.Create(C1) * r + Vector512.Create(C0)))); - return Vector512.ConditionalSelect( - Vector512.GreaterThanOrEqual(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN)).AsSingle(), - x, - n + poly - ); + Vector512 result = n + poly; + + // x < 1.1754944E-38 or x = INF or x = NAN + if (Vector512.GreaterThanOrEqualAny(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN))) + { + // log2(+0.0) == log2(-0.0) == -Infinity + result = Vector512.ConditionalSelect( + Vector512.Equals(Vector512.ShiftLeft(x.AsUInt32(), 1), Vector512.Zero).AsSingle(), + Vector512.Create(float.NegativeInfinity), + result + ); + + // log2(NaN) == NaN + result = Vector512.ConditionalSelect( + Vector512.Equals(x, x), + result, + x + ); + + // log2(-x) == NaN + result = Vector512.ConditionalSelect( + Vector512.LessThan(x.AsInt32(), Vector512.Zero).AsSingle(), + Vector512.Create(float.NaN), + result + ); + + // log2(+Infinity) == log2(-Infinity) == +Infinity + result = Vector512.ConditionalSelect( + Vector512.Equals(x.AsUInt32() & Vector512.Create(V_MAX), Vector512.Create(V_MAX)).AsSingle(), + Vector512.Create(float.PositiveInfinity), + result + ); + } + + return result; } #endif } From dc7c0de617f08058c2b69beb13d3d47857f1cfc7 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 2 Oct 2023 14:51:55 -0700 Subject: [PATCH 7/7] Ensure that Log2 special results are explicitly handled. --- .../Tensors/TensorPrimitives.netcore.cs | 233 ++++++++++-------- 1 file changed, 136 insertions(+), 97 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs index 6b867745b062d..18244a42cd296 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs @@ -2651,8 +2651,51 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector128 Invoke(Vector128 x) { + Vector128 specialResult = x; + + // x is subnormal or infinity or NaN + Vector128 specialMask = Vector128.GreaterThanOrEqual(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN)); + + if (specialMask != Vector128.Zero) + { + // float.IsZero(x) ? float.NegativeInfinity : x + Vector128 zeroMask = Vector128.Equals(x, Vector128.Zero); + + specialResult = Vector128.ConditionalSelect( + zeroMask, + Vector128.Create(float.NegativeInfinity), + specialResult + ); + + // (x < 0) ? float.NaN : x + Vector128 lessThanZeroMask = Vector128.LessThan(x, Vector128.Zero); + + specialResult = Vector128.ConditionalSelect( + lessThanZeroMask, + Vector128.Create(float.NaN), + specialResult + ); + + // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x) + Vector128 temp = zeroMask + | lessThanZeroMask + | ~Vector128.Equals(x, x) + | Vector128.Equals(x, Vector128.Create(float.PositiveInfinity)); + + // subnormal + Vector128 subnormalMask = Vector128.AndNot(specialMask.AsSingle(), temp); + + x = Vector128.ConditionalSelect( + subnormalMask, + ((x * 8388608.0f).AsUInt32() - Vector128.Create(23u << 23)).AsSingle(), + x + ); + + specialMask = temp.AsUInt32(); + } + Vector128 vx = x.AsUInt32() - Vector128.Create(V_OFF); - Vector128 n = (Vector128.ShiftRightArithmetic(vx.AsInt32(), 23)).AsSingle(); + Vector128 n = Vector128.ConvertToSingle(Vector128.ShiftRightArithmetic(vx.AsInt32(), 23)); vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF); @@ -2668,47 +2711,60 @@ public static Vector128 Invoke(Vector128 x) + ((Vector128.Create(C3) * r + Vector128.Create(C2)) * r2 + (Vector128.Create(C1) * r + Vector128.Create(C0)))); - Vector128 result = n + poly; + return Vector128.ConditionalSelect( + specialMask.AsSingle(), + specialResult, + n + poly + ); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 specialResult = x; + + // x is subnormal or infinity or NaN + Vector256 specialMask = Vector256.GreaterThanOrEqual(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN)); - // x < 1.1754944E-38 or x = INF or x = NAN - if (Vector128.GreaterThanOrEqualAny(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN))) + if (specialMask != Vector256.Zero) { - // log2(+0.0) == log2(-0.0) == -Infinity - result = Vector128.ConditionalSelect( - Vector128.Equals(Vector128.ShiftLeft(x.AsUInt32(), 1), Vector128.Zero).AsSingle(), - Vector128.Create(float.NegativeInfinity), - result - ); + // float.IsZero(x) ? float.NegativeInfinity : x + Vector256 zeroMask = Vector256.Equals(x, Vector256.Zero); - // log2(NaN) == NaN - result = Vector128.ConditionalSelect( - Vector128.Equals(x, x), - result, - x + specialResult = Vector256.ConditionalSelect( + zeroMask, + Vector256.Create(float.NegativeInfinity), + specialResult ); - // log2(-x) == NaN - result = Vector128.ConditionalSelect( - Vector128.LessThan(x.AsInt32(), Vector128.Zero).AsSingle(), - Vector128.Create(float.NaN), - result + // (x < 0) ? float.NaN : x + Vector256 lessThanZeroMask = Vector256.LessThan(x, Vector256.Zero); + + specialResult = Vector256.ConditionalSelect( + lessThanZeroMask, + Vector256.Create(float.NaN), + specialResult ); - // log2(+Infinity) == log2(-Infinity) == +Infinity - result = Vector128.ConditionalSelect( - Vector128.Equals(x.AsUInt32() & Vector128.Create(V_MAX), Vector128.Create(V_MAX)).AsSingle(), - Vector128.Create(float.PositiveInfinity), - result + // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x) + Vector256 temp = zeroMask + | lessThanZeroMask + | ~Vector256.Equals(x, x) + | Vector256.Equals(x, Vector256.Create(float.PositiveInfinity)); + + // subnormal + Vector256 subnormalMask = Vector256.AndNot(specialMask.AsSingle(), temp); + + x = Vector256.ConditionalSelect( + subnormalMask, + ((x * 8388608.0f).AsUInt32() - Vector256.Create(23u << 23)).AsSingle(), + x ); - } - return result; - } + specialMask = temp.AsUInt32(); + } - public static Vector256 Invoke(Vector256 x) - { Vector256 vx = x.AsUInt32() - Vector256.Create(V_OFF); - Vector256 n = (Vector256.ShiftRightArithmetic(vx.AsInt32(), 23)).AsSingle(); + Vector256 n = Vector256.ConvertToSingle(Vector256.ShiftRightArithmetic(vx.AsInt32(), 23)); vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF); @@ -2724,48 +2780,61 @@ public static Vector256 Invoke(Vector256 x) + ((Vector256.Create(C3) * r + Vector256.Create(C2)) * r2 + (Vector256.Create(C1) * r + Vector256.Create(C0)))); - Vector256 result = n + poly; + return Vector256.ConditionalSelect( + specialMask.AsSingle(), + specialResult, + n + poly + ); + } + +#if NET8_0_OR_GREATER + public static Vector512 Invoke(Vector512 x) + { + Vector512 specialResult = x; - // x < 1.1754944E-38 or x = INF or x = NAN - if (Vector256.GreaterThanOrEqualAny(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN))) + // x is subnormal or infinity or NaN + Vector512 specialMask = Vector512.GreaterThanOrEqual(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN)); + + if (specialMask != Vector512.Zero) { - // log2(+0.0) == log2(-0.0) == -Infinity - result = Vector256.ConditionalSelect( - Vector256.Equals(Vector256.ShiftLeft(x.AsUInt32(), 1), Vector256.Zero).AsSingle(), - Vector256.Create(float.NegativeInfinity), - result - ); + // float.IsZero(x) ? float.NegativeInfinity : x + Vector512 zeroMask = Vector512.Equals(x, Vector512.Zero); - // log2(NaN) == NaN - result = Vector256.ConditionalSelect( - Vector256.Equals(x, x), - result, - x + specialResult = Vector512.ConditionalSelect( + zeroMask, + Vector512.Create(float.NegativeInfinity), + specialResult ); - // log2(-x) == NaN - result = Vector256.ConditionalSelect( - Vector256.LessThan(x.AsInt32(), Vector256.Zero).AsSingle(), - Vector256.Create(float.NaN), - result + // (x < 0) ? float.NaN : x + Vector512 lessThanZeroMask = Vector512.LessThan(x, Vector512.Zero); + + specialResult = Vector512.ConditionalSelect( + lessThanZeroMask, + Vector512.Create(float.NaN), + specialResult ); - // log2(+Infinity) == log2(-Infinity) == +Infinity - result = Vector256.ConditionalSelect( - Vector256.Equals(x.AsUInt32() & Vector256.Create(V_MAX), Vector256.Create(V_MAX)).AsSingle(), - Vector256.Create(float.PositiveInfinity), - result + // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x) + Vector512 temp = zeroMask + | lessThanZeroMask + | ~Vector512.Equals(x, x) + | Vector512.Equals(x, Vector512.Create(float.PositiveInfinity)); + + // subnormal + Vector512 subnormalMask = Vector512.AndNot(specialMask.AsSingle(), temp); + + x = Vector512.ConditionalSelect( + subnormalMask, + ((x * 8388608.0f).AsUInt32() - Vector512.Create(23u << 23)).AsSingle(), + x ); - } - return result; - } + specialMask = temp.AsUInt32(); + } -#if NET8_0_OR_GREATER - public static Vector512 Invoke(Vector512 x) - { Vector512 vx = x.AsUInt32() - Vector512.Create(V_OFF); - Vector512 n = (Vector512.ShiftRightArithmetic(vx.AsInt32(), 23)).AsSingle(); + Vector512 n = Vector512.ConvertToSingle(Vector512.ShiftRightArithmetic(vx.AsInt32(), 23)); vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF); @@ -2781,41 +2850,11 @@ public static Vector512 Invoke(Vector512 x) + ((Vector512.Create(C3) * r + Vector512.Create(C2)) * r2 + (Vector512.Create(C1) * r + Vector512.Create(C0)))); - Vector512 result = n + poly; - - // x < 1.1754944E-38 or x = INF or x = NAN - if (Vector512.GreaterThanOrEqualAny(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN))) - { - // log2(+0.0) == log2(-0.0) == -Infinity - result = Vector512.ConditionalSelect( - Vector512.Equals(Vector512.ShiftLeft(x.AsUInt32(), 1), Vector512.Zero).AsSingle(), - Vector512.Create(float.NegativeInfinity), - result - ); - - // log2(NaN) == NaN - result = Vector512.ConditionalSelect( - Vector512.Equals(x, x), - result, - x - ); - - // log2(-x) == NaN - result = Vector512.ConditionalSelect( - Vector512.LessThan(x.AsInt32(), Vector512.Zero).AsSingle(), - Vector512.Create(float.NaN), - result - ); - - // log2(+Infinity) == log2(-Infinity) == +Infinity - result = Vector512.ConditionalSelect( - Vector512.Equals(x.AsUInt32() & Vector512.Create(V_MAX), Vector512.Create(V_MAX)).AsSingle(), - Vector512.Create(float.PositiveInfinity), - result - ); - } - - return result; + return Vector512.ConditionalSelect( + specialMask.AsSingle(), + specialResult, + n + poly + ); } #endif }