From 210d8f7740ab79207440eb42d03a2a07b7e8c0d0 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Tue, 3 Nov 2020 20:25:21 +0100 Subject: [PATCH 01/10] Add initial vectorized color converter implementation --- ...cs => JpegColorConverter.FromCmykBasic.cs} | 16 +- .../JpegColorConverter.FromCmykSimdAvx2.cs | 145 +++++++++++++ ... JpegColorConverter.FromGrayScaleBasic.cs} | 13 +- ...pegColorConverter.FromGrayScaleSimdAvx2.cs | 109 ++++++++++ ....cs => JpegColorConverter.FromRgbBasic.cs} | 14 +- .../JpegColorConverter.FromRgbSimdAvx2.cs | 132 ++++++++++++ .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 5 +- ...cs => JpegColorConverter.FromYccKBasic.cs} | 25 ++- .../JpegColorConverter.FromYccKSimdAvx2.cs | 193 ++++++++++++++++++ .../ColorConverters/JpegColorConverter.cs | 86 +++++++- .../Formats/Jpg/JpegColorConverterTests.cs | 2 +- 11 files changed, 704 insertions(+), 36 deletions(-) rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromCmyk.cs => JpegColorConverter.FromCmykBasic.cs} (75%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromGrayScale.cs => JpegColorConverter.FromGrayScaleBasic.cs} (74%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromRgb.cs => JpegColorConverter.FromRgbBasic.cs} (76%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromYccK.cs => JpegColorConverter.FromYccKBasic.cs} (57%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs similarity index 75% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs index 7b257b37da..117bb2c3fd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs @@ -8,16 +8,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromCmyk : JpegColorConverter + internal sealed class FromCmykBasic : JpegColorConverter { - public FromCmyk(int precision) + public FromCmykBasic(int precision) : base(JpegColorSpace.Cmyk, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { ReadOnlySpan cVals = values.Component0; ReadOnlySpan mVals = values.Component1; ReadOnlySpan yVals = values.Component2; @@ -25,7 +29,7 @@ public override void ConvertToRgba(in ComponentValues values, Span resu var v = new Vector4(0, 0, 0, 1F); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) @@ -33,7 +37,7 @@ public override void ConvertToRgba(in ComponentValues values, Span resu float c = cVals[i]; float m = mVals[i]; float y = yVals[i]; - float k = kVals[i] / this.MaximumValue; + float k = kVals[i] / maxValue; v.X = c * k; v.Y = m * k; @@ -47,4 +51,4 @@ public override void ConvertToRgba(in ComponentValues values, Span resu } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs new file mode 100644 index 0000000000..afd36073ab --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs @@ -0,0 +1,145 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykVector8 : JpegColorConverter + { + public FromCmykVector8(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); + } + + FromCmykBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / maxValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); + Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); + Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); + Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); + + k = Avx.Multiply(k, scale); + + c = Avx.Multiply(Avx.Multiply(c, k), scale); + m = Avx.Multiply(Avx.Multiply(m, k), scale); + y = Avx.Multiply(Avx.Multiply(y, k), scale); + + Vector256 cmLo = Avx.UnpackLow(c, m); + Vector256 yoLo = Avx.UnpackLow(y, one); + Vector256 cmHi = Avx.UnpackHigh(c, m); + Vector256 yoHi = Avx.UnpackHigh(y, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); + } +#else + ref Vector cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair cc = default; + Vector4Pair mm = default; + Vector4Pair yy = default; + ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); + ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); + ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); + + var scale = new Vector(1 / maxValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector c = Unsafe.Add(ref cBase, i); + Vector m = Unsafe.Add(ref mBase, i); + Vector y = Unsafe.Add(ref yBase, i); + Vector k = Unsafe.Add(ref kBase, i) * scale; + + c = (c * k) * scale; + m = (m * k) * scale; + y = (y * k) * scale; + + ccRefAsVector = c; + mmRefAsVector = m; + yyRefAsVector = y; + + // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref cc, ref mm, ref yy); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs similarity index 74% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs index cf0bc2c920..459a33a966 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs @@ -10,16 +10,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromGrayscale : JpegColorConverter + internal sealed class FromGrayscaleBasic : JpegColorConverter { - public FromGrayscale(int precision) + public FromGrayscaleBasic(int precision) : base(JpegColorSpace.Grayscale, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - var maximum = 1 / this.MaximumValue; + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); ref float sBase = ref MemoryMarshal.GetReference(values.Component0); @@ -35,4 +40,4 @@ public override void ConvertToRgba(in ComponentValues values, Span resu } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs new file mode 100644 index 0000000000..c25057c6f5 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs @@ -0,0 +1,109 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleVector8 : JpegColorConverter + { + public FromGrayscaleVector8(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); + } + + FromGrayscaleBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / maxValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); + + g = Avx.Multiply(g, scale); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); + Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); + Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); + Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); + } +#else + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair gg = default; + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + + var scale = new Vector(1 / maxValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector g = Unsafe.Add(ref gBase, i); + g *= scale; + + ggRefAsVector = g; + + // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref gg); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs similarity index 76% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs index 25889a6dfc..f9faf14353 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs @@ -8,23 +8,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromRgb : JpegColorConverter + internal sealed class FromRgbBasic : JpegColorConverter { - public FromRgb(int precision) + public FromRgbBasic(int precision) : base(JpegColorSpace.RGB, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { ReadOnlySpan rVals = values.Component0; ReadOnlySpan gVals = values.Component1; ReadOnlySpan bVals = values.Component2; var v = new Vector4(0, 0, 0, 1); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) @@ -44,4 +48,4 @@ public override void ConvertToRgba(in ComponentValues values, Span resu } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs new file mode 100644 index 0000000000..ebaa512119 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs @@ -0,0 +1,132 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbVector8 : JpegColorConverter + { + public FromRgbVector8(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); + } + + FromRgbBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / maxValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); + Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); + Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); + + Vector256 rgLo = Avx.UnpackLow(r, g); + Vector256 boLo = Avx.UnpackLow(b, one); + Vector256 rgHi = Avx.UnpackHigh(r, g); + Vector256 boHi = Avx.UnpackHigh(b, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); + } +#else + ref Vector rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / maxValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector r = Unsafe.Add(ref rBase, i); + Vector g = Unsafe.Add(ref gBase, i); + Vector b = Unsafe.Add(ref bBase, i); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index 1319b56ee0..05258fb9b9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -9,8 +9,9 @@ using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using static SixLabors.ImageSharp.SimdUtils; -#endif +#else using SixLabors.ImageSharp.Tuples; +#endif // ReSharper disable ImpureMethodCallOnReadonlyValueField namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters @@ -98,7 +99,7 @@ internal static void ConvertCore(in ComponentValues values, Span result Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); - // TODO: We should be savving to RGBA not Vector4 + // TODO: We should be saving to RGBA not Vector4 r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs similarity index 57% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs index 1137cdc0ec..1008889566 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs @@ -8,14 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYccK : JpegColorConverter + internal sealed class FromYccKBasic : JpegColorConverter { - public FromYccK(int precision) + public FromYccKBasic(int precision) : base(JpegColorSpace.Ycck, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) + { + ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) { // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! ReadOnlySpan yVals = values.Component0; @@ -25,19 +30,19 @@ public override void ConvertToRgba(in ComponentValues values, Span resu var v = new Vector4(0, 0, 0, 1F); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) { float y = yVals[i]; - float cb = cbVals[i] - this.HalfValue; - float cr = crVals[i] - this.HalfValue; - float k = kVals[i] / this.MaximumValue; + float cb = cbVals[i] - halfValue; + float cr = crVals[i] - halfValue; + float k = kVals[i] / maxValue; - v.X = (this.MaximumValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; - v.Y = (this.MaximumValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k; - v.Z = (this.MaximumValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; + v.X = (maxValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; + v.Y = (maxValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k; + v.Z = (maxValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; v.W = 1F; v *= scale; @@ -47,4 +52,4 @@ public override void ConvertToRgba(in ComponentValues values, Span resu } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs new file mode 100644 index 0000000000..8645fb8fa4 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs @@ -0,0 +1,193 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKVector8 : JpegColorConverter + { + public FromYccKVector8(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); + } + + FromYccKBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-halfValue); + var scale = Vector256.Create(1 / maxValue); + var max = Vector256.Create(maxValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + k = Avx2.PermuteVar8x32(k, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); + g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); + b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); + + r = Avx.Multiply(Avx.Multiply(r, k), scale); + g = Avx.Multiply(Avx.Multiply(g, k), scale); + b = Avx.Multiply(Avx.Multiply(b, k), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#else + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-halfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / maxValue); + var max = new Vector(maxValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + Vector k = Unsafe.Add(ref kBase, i) / max; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = (max - r.FastRound()) * k; + g = (max - g.FastRound()) * k; + b = (max - b.FastRound()) * k; + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 7c780700c9..c49ec7e387 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -21,17 +21,17 @@ internal abstract partial class JpegColorConverter { // 8-bit converters GetYCbCrConverter(8), - new FromYccK(8), - new FromCmyk(8), - new FromGrayscale(8), - new FromRgb(8), + GetYccKConverter(8), + GetCmykConverter(8), + GetGrayScaleConverter(8), + GetRgbConverter(8), // 12-bit converters GetYCbCrConverter(12), - new FromYccK(12), - new FromCmyk(12), - new FromGrayscale(12), - new FromRgb(12), + GetYccKConverter(12), + GetCmykConverter(12), + GetGrayScaleConverter(12), + GetRgbConverter(12), }; /// @@ -94,6 +94,30 @@ public static JpegColorConverter GetConverter(JpegColorSpace colorSpace, int pre private static JpegColorConverter GetYCbCrConverter(int precision) => FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision); + /// + /// Returns the for the YccK colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetYccKConverter(int precision) => + FromYccKVector8.IsAvailable ? (JpegColorConverter)new FromYccKVector8(precision) : new FromYccKBasic(precision); + + /// + /// Returns the for the CMYK colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetCmykConverter(int precision) => + FromCmykVector8.IsAvailable ? (JpegColorConverter)new FromCmykVector8(precision) : new FromCmykBasic(precision); + + /// + /// Returns the for the gray scale colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetGrayScaleConverter(int precision) => + FromGrayscaleVector8.IsAvailable ? (JpegColorConverter)new FromGrayscaleVector8(precision) : new FromGrayscaleBasic(precision); + + /// + /// Returns the for the RGB colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetRgbConverter(int precision) => + FromRgbVector8.IsAvailable ? (JpegColorConverter)new FromRgbVector8(precision) : new FromRgbBasic(precision); + /// /// A stack-only struct to reference the input buffers using -s. /// @@ -229,6 +253,52 @@ public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b) this.V7.Z = b.B.W; this.V7.W = 1f; } + + /// + /// Pack (g0,g1...g7) vector values as (g0,g0,g0,1), (g1,g1,g1,1) ... + /// + public void Pack(ref Vector4Pair g) + { + this.V0.X = g.A.X; + this.V0.Y = g.A.X; + this.V0.Z = g.A.X; + this.V0.W = 1f; + + this.V1.X = g.A.Y; + this.V1.Y = g.A.Y; + this.V1.Z = g.A.Y; + this.V1.W = 1f; + + this.V2.X = g.A.Z; + this.V2.Y = g.A.Z; + this.V2.Z = g.A.Z; + this.V2.W = 1f; + + this.V3.X = g.A.W; + this.V3.Y = g.A.W; + this.V3.Z = g.A.W; + this.V3.W = 1f; + + this.V4.X = g.B.X; + this.V4.Y = g.B.X; + this.V4.Z = g.B.X; + this.V4.W = 1f; + + this.V5.X = g.B.Y; + this.V5.Y = g.B.Y; + this.V5.Z = g.B.Y; + this.V5.W = 1f; + + this.V6.X = g.B.Z; + this.V6.Y = g.B.Z; + this.V6.Z = g.B.Z; + this.V6.W = 1f; + + this.V7.X = g.B.W; + this.V7.Y = g.B.W; + this.V7.Z = g.B.W; + this.V7.W = 1f; + } } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 860f9c396c..9abe318846 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -183,7 +183,7 @@ public void ConvertFromCmyk(int inputBufferLength, int resultBufferLength, int s var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); var expected = new Rgb(v.X, v.Y, v.Z); - Assert.Equal(expected, actual); + Assert.Equal(expected, actual, ColorSpaceComparer); Assert.Equal(1, rgba.W); } } From f421be21211a0b659ac251adbb1954bced2bfac0 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Wed, 4 Nov 2020 18:03:36 +0100 Subject: [PATCH 02/10] Refactor JpegColorConverters --- src/ImageSharp/Common/Helpers/SimdUtils.cs | 12 ++ ...egColorConverter.Avx2JpegColorConverter.cs | 22 ++ ...gColorConverter.BasicJpegColorConverter.cs | 18 ++ .../JpegColorConverter.FromCmykAvx2.cs | 81 ++++++++ .../JpegColorConverter.FromCmykBasic.cs | 2 +- .../JpegColorConverter.FromCmykSimdAvx2.cs | 145 ------------- .../JpegColorConverter.FromCmykVector8.cs | 71 +++++++ .../JpegColorConverter.FromGrayScaleAvx2.cs | 63 ++++++ .../JpegColorConverter.FromGrayScaleBasic.cs | 2 +- ...pegColorConverter.FromGrayScaleSimdAvx2.cs | 109 ---------- ...JpegColorConverter.FromGrayScaleVector8.cs | 53 +++++ .../JpegColorConverter.FromRgbAvx2.cs | 72 +++++++ .../JpegColorConverter.FromRgbBasic.cs | 2 +- .../JpegColorConverter.FromRgbSimdAvx2.cs | 132 ------------ .../JpegColorConverter.FromRgbVector8.cs | 67 ++++++ .../JpegColorConverter.FromYCbCrAvx2.cs | 101 +++++++++ .../JpegColorConverter.FromYCbCrBasic.cs | 4 +- .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 183 ----------------- ... => JpegColorConverter.FromYCbCrVector.cs} | 30 +-- .../JpegColorConverter.FromYCbCrVector8.cs | 87 ++++++++ .../JpegColorConverter.FromYccKAvx2.cs | 110 ++++++++++ .../JpegColorConverter.FromYccKBasic.cs | 2 +- .../JpegColorConverter.FromYccKSimdAvx2.cs | 193 ------------------ .../JpegColorConverter.FromYccKVector8.cs | 91 +++++++++ ...olorConverter.Vector8JpegColorConverter.cs | 18 ++ ...rConverter.VectorizedJpegColorConverter.cs | 46 +++++ .../ColorConverters/JpegColorConverter.cs | 99 ++++++--- .../Codecs/Jpeg/YCbCrColorConversion.cs | 14 +- .../Formats/Jpg/JpegColorConverterTests.cs | 68 +++--- 29 files changed, 1042 insertions(+), 855 deletions(-) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromYCbCrSimd.cs => JpegColorConverter.FromYCbCrVector.cs} (78%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index df533cedf1..073a5a9f67 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -25,6 +25,18 @@ internal static partial class SimdUtils public static bool HasVector8 { get; } = Vector.IsHardwareAccelerated && Vector.Count == 8 && Vector.Count == 8; + public static bool HasAvx2 + { + get + { +#if SUPPORTS_RUNTIME_INTRINSICS + return Avx2.IsSupported; +#else + return false; +#endif + } + } + /// /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics. /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs new file mode 100644 index 0000000000..ef144fc1c7 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs @@ -0,0 +1,22 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter + { + protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision, 8) + { + } + + protected sealed override bool IsAvailable => SimdUtils.HasAvx2; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs new file mode 100644 index 0000000000..ed2e2cd762 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs @@ -0,0 +1,18 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class BasicJpegColorConverter : JpegColorConverter + { + protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision) + { + } + + protected override bool IsAvailable => true; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs new file mode 100644 index 0000000000..a9e3e5b51d --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs @@ -0,0 +1,81 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykAvx2 : Avx2JpegColorConverter + { + public FromCmykAvx2(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); + Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); + Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); + Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); + + k = Avx.Multiply(k, scale); + + c = Avx.Multiply(Avx.Multiply(c, k), scale); + m = Avx.Multiply(Avx.Multiply(m, k), scale); + y = Avx.Multiply(Avx.Multiply(y, k), scale); + + Vector256 cmLo = Avx.UnpackLow(c, m); + Vector256 yoLo = Avx.UnpackLow(y, one); + Vector256 cmHi = Avx.UnpackHigh(c, m); + Vector256 yoHi = Avx.UnpackHigh(y, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromCmykBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs index 117bb2c3fd..6cbd52ec3d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromCmykBasic : JpegColorConverter + internal sealed class FromCmykBasic : BasicJpegColorConverter { public FromCmykBasic(int precision) : base(JpegColorSpace.Cmyk, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs deleted file mode 100644 index afd36073ab..0000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromCmykVector8 : JpegColorConverter - { - public FromCmykVector8(int precision) - : base(JpegColorSpace.Cmyk, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); - } - - FromCmykBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 cBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 mBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector256 kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var scale = Vector256.Create(1 / maxValue); - var one = Vector256.Create(1F); - - // Used for packing - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); - Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); - Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); - Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); - - k = Avx.Multiply(k, scale); - - c = Avx.Multiply(Avx.Multiply(c, k), scale); - m = Avx.Multiply(Avx.Multiply(m, k), scale); - y = Avx.Multiply(Avx.Multiply(y, k), scale); - - Vector256 cmLo = Avx.UnpackLow(c, m); - Vector256 yoLo = Avx.UnpackLow(y, one); - Vector256 cmHi = Avx.UnpackHigh(c, m); - Vector256 yoHi = Avx.UnpackHigh(y, one); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); - Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); - Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); - Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); - } -#else - ref Vector cBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector mBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair cc = default; - Vector4Pair mm = default; - Vector4Pair yy = default; - ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); - ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); - ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); - - var scale = new Vector(1 / maxValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector c = Unsafe.Add(ref cBase, i); - Vector m = Unsafe.Add(ref mBase, i); - Vector y = Unsafe.Add(ref yBase, i); - Vector k = Unsafe.Add(ref kBase, i) * scale; - - c = (c * k) * scale; - m = (m * k) * scale; - y = (y * k) * scale; - - ccRefAsVector = c; - mmRefAsVector = m; - yyRefAsVector = y; - - // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref cc, ref mm, ref yy); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs new file mode 100644 index 0000000000..e75634b0fa --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs @@ -0,0 +1,71 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykVector8 : Vector8JpegColorConverter + { + public FromCmykVector8(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair cc = default; + Vector4Pair mm = default; + Vector4Pair yy = default; + ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); + ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); + ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector c = Unsafe.Add(ref cBase, i); + Vector m = Unsafe.Add(ref mBase, i); + Vector y = Unsafe.Add(ref yBase, i); + Vector k = Unsafe.Add(ref kBase, i) * scale; + + c = (c * k) * scale; + m = (m * k) * scale; + y = (y * k) * scale; + + ccRefAsVector = c; + mmRefAsVector = m; + yyRefAsVector = y; + + // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref cc, ref mm, ref yy); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromCmykBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs new file mode 100644 index 0000000000..aebffc3dfc --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs @@ -0,0 +1,63 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter + { + public FromGrayscaleAvx2(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); + + g = Avx.Multiply(g, scale); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); + Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); + Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); + Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs index 459a33a966..0b7a220d94 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs @@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromGrayscaleBasic : JpegColorConverter + internal sealed class FromGrayscaleBasic : BasicJpegColorConverter { public FromGrayscaleBasic(int precision) : base(JpegColorSpace.Grayscale, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs deleted file mode 100644 index c25057c6f5..0000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromGrayscaleVector8 : JpegColorConverter - { - public FromGrayscaleVector8(int precision) - : base(JpegColorSpace.Grayscale, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); - } - - FromGrayscaleBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var scale = Vector256.Create(1 / maxValue); - var one = Vector256.Create(1F); - - // Used for packing - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); - - g = Avx.Multiply(g, scale); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); - Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); - Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); - Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); - } -#else - ref Vector gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair gg = default; - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - - var scale = new Vector(1 / maxValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector g = Unsafe.Add(ref gBase, i); - g *= scale; - - ggRefAsVector = g; - - // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref gg); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs new file mode 100644 index 0000000000..75ea601012 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs @@ -0,0 +1,53 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleVector8 : Vector8JpegColorConverter + { + public FromGrayscaleVector8(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair gg = default; + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector g = Unsafe.Add(ref gBase, i); + g *= scale; + + ggRefAsVector = g; + + // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref gg); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs new file mode 100644 index 0000000000..8f04c91528 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs @@ -0,0 +1,72 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbAvx2 : Avx2JpegColorConverter + { + public FromRgbAvx2(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); + Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); + Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); + + Vector256 rgLo = Avx.UnpackLow(r, g); + Vector256 boLo = Avx.UnpackLow(b, one); + Vector256 rgHi = Avx.UnpackHigh(r, g); + Vector256 boHi = Avx.UnpackHigh(b, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromRgbBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs index f9faf14353..ddca3fe2f6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromRgbBasic : JpegColorConverter + internal sealed class FromRgbBasic : BasicJpegColorConverter { public FromRgbBasic(int precision) : base(JpegColorSpace.RGB, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs deleted file mode 100644 index ebaa512119..0000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromRgbVector8 : JpegColorConverter - { - public FromRgbVector8(int precision) - : base(JpegColorSpace.RGB, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); - } - - FromRgbBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 rBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 bBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var scale = Vector256.Create(1 / maxValue); - var one = Vector256.Create(1F); - - // Used for packing - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); - Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); - Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); - - Vector256 rgLo = Avx.UnpackLow(r, g); - Vector256 boLo = Avx.UnpackLow(b, one); - Vector256 rgHi = Avx.UnpackHigh(r, g); - Vector256 boHi = Avx.UnpackHigh(b, one); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); - Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); - Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); - Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); - } -#else - ref Vector rBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector bBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - - var scale = new Vector(1 / maxValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector r = Unsafe.Add(ref rBase, i); - Vector g = Unsafe.Add(ref gBase, i); - Vector b = Unsafe.Add(ref bBase, i); - r *= scale; - g *= scale; - b *= scale; - - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref rr, ref gg, ref bb); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs new file mode 100644 index 0000000000..763064d1e0 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs @@ -0,0 +1,67 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbVector8 : Vector8JpegColorConverter + { + public FromRgbVector8(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector r = Unsafe.Add(ref rBase, i); + Vector g = Unsafe.Add(ref gBase, i); + Vector b = Unsafe.Add(ref bBase, i); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromRgbBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs new file mode 100644 index 0000000000..f3a0636200 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs @@ -0,0 +1,101 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +// ReSharper disable ImpureMethodCallOnReadonlyValueField +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYCbCrAvx2 : Avx2JpegColorConverter + { + public FromYCbCrAvx2(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + #if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-this.HalfValue); + var scale = Vector256.Create(1 / this.MaximumValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + // TODO: We should be saving to RGBA not Vector4 + r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); + g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); + b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs index 31fc054619..352e4acb7e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrBasic : JpegColorConverter + internal sealed class FromYCbCrBasic : BasicJpegColorConverter { public FromYCbCrBasic(int precision) : base(JpegColorSpace.YCbCr, precision) @@ -48,4 +48,4 @@ internal static void ConvertCore(in ComponentValues values, Span result } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs deleted file mode 100644 index 05258fb9b9..0000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -// ReSharper disable ImpureMethodCallOnReadonlyValueField -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromYCbCrSimdVector8 : JpegColorConverter - { - public FromYCbCrSimdVector8(int precision) - : base(JpegColorSpace.YCbCr, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } - - FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - /// - /// SIMD convert using buffers of sizes divisible by 8. - /// - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var chromaOffset = Vector256.Create(-halfValue); - var scale = Vector256.Create(1 / maxValue); - var rCrMult = Vector256.Create(1.402F); - var gCbMult = Vector256.Create(-0.344136F); - var gCrMult = Vector256.Create(-0.714136F); - var bCbMult = Vector256.Create(1.772F); - - // Used for packing. - var va = Vector256.Create(1F); - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - Vector256 y = Unsafe.Add(ref yBase, i); - Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); - Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); - - y = Avx2.PermuteVar8x32(y, vcontrol); - cb = Avx2.PermuteVar8x32(cb, vcontrol); - cr = Avx2.PermuteVar8x32(cr, vcontrol); - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); - Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); - Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); - - // TODO: We should be saving to RGBA not Vector4 - r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); - g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); - b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); - - Vector256 vte = Avx.UnpackLow(r, b); - Vector256 vto = Avx.UnpackLow(g, va); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); - - vte = Avx.UnpackHigh(r, b); - vto = Avx.UnpackHigh(g, va); - - Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); - } -#else - ref Vector yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - var chromaOffset = new Vector(-halfValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - - ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - - var scale = new Vector(1 / maxValue); - - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - Vector y = Unsafe.Add(ref yBase, i); - Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; - Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector r = y + (cr * new Vector(1.402F)); - Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); - Vector b = y + (cb * new Vector(1.772F)); - - r = r.FastRound(); - g = g.FastRound(); - b = b.FastRound(); - r *= scale; - g *= scale; - b *= scale; - - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref rr, ref gg, ref bb); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs similarity index 78% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs index 541a03615e..65a7c42d8d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs @@ -5,36 +5,22 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - using SixLabors.ImageSharp.Tuples; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrSimd : JpegColorConverter + internal sealed class FromYCbCrVector : VectorizedJpegColorConverter { - public FromYCbCrSimd(int precision) - : base(JpegColorSpace.YCbCr, precision) + public FromYCbCrVector(int precision) + : base(JpegColorSpace.YCbCr, precision, 8) { } - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } + protected override bool IsAvailable => true; - FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - /// - /// SIMD convert using buffers of sizes divisible by 8. - /// - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!"); @@ -48,7 +34,8 @@ internal static void ConvertCore(in ComponentValues values, Span result ref Vector4Octet resultBase = ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - var chromaOffset = new Vector4(-halfValue); + var chromaOffset = new Vector4(-this.HalfValue); + var maxValue = this.MaximumValue; // Walking 8 elements at one step: int n = result.Length / 8; @@ -112,6 +99,9 @@ internal static void ConvertCore(in ComponentValues values, Span result destination.Pack(ref r, ref g, ref b); } } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs new file mode 100644 index 0000000000..abacf7161b --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs @@ -0,0 +1,87 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +// ReSharper disable ImpureMethodCallOnReadonlyValueField +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYCbCrVector8 : Vector8JpegColorConverter + { + public FromYCbCrVector8(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-this.HalfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = r.FastRound(); + g = g.FastRound(); + b = b.FastRound(); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs new file mode 100644 index 0000000000..ea0132e1e9 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs @@ -0,0 +1,110 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKAvx2 : Avx2JpegColorConverter + { + public FromYccKAvx2(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-this.HalfValue); + var scale = Vector256.Create(1 / this.MaximumValue); + var max = Vector256.Create(this.MaximumValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + k = Avx2.PermuteVar8x32(k, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = + HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); + g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); + b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); + + r = Avx.Multiply(Avx.Multiply(r, k), scale); + g = Avx.Multiply(Avx.Multiply(g, k), scale); + b = Avx.Multiply(Avx.Multiply(b, k), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs index 1008889566..778e5325ff 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYccKBasic : JpegColorConverter + internal sealed class FromYccKBasic : BasicJpegColorConverter { public FromYccKBasic(int precision) : base(JpegColorSpace.Ycck, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs deleted file mode 100644 index 8645fb8fa4..0000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromYccKVector8 : JpegColorConverter - { - public FromYccKVector8(int precision) - : base(JpegColorSpace.Ycck, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } - - FromYccKBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector256 kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var chromaOffset = Vector256.Create(-halfValue); - var scale = Vector256.Create(1 / maxValue); - var max = Vector256.Create(maxValue); - var rCrMult = Vector256.Create(1.402F); - var gCbMult = Vector256.Create(-0.344136F); - var gCrMult = Vector256.Create(-0.714136F); - var bCbMult = Vector256.Create(1.772F); - - // Used for packing. - var va = Vector256.Create(1F); - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - // k = kVals[i] / 256F; - Vector256 y = Unsafe.Add(ref yBase, i); - Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); - Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); - Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); - - y = Avx2.PermuteVar8x32(y, vcontrol); - cb = Avx2.PermuteVar8x32(cb, vcontrol); - cr = Avx2.PermuteVar8x32(cr, vcontrol); - k = Avx2.PermuteVar8x32(k, vcontrol); - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); - Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); - Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); - - r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); - g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); - b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); - - r = Avx.Multiply(Avx.Multiply(r, k), scale); - g = Avx.Multiply(Avx.Multiply(g, k), scale); - b = Avx.Multiply(Avx.Multiply(b, k), scale); - - Vector256 vte = Avx.UnpackLow(r, b); - Vector256 vto = Avx.UnpackLow(g, va); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); - - vte = Avx.UnpackHigh(r, b); - vto = Avx.UnpackHigh(g, va); - - Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); - } -#else - ref Vector yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - var chromaOffset = new Vector(-halfValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - - ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - - var scale = new Vector(1 / maxValue); - var max = new Vector(maxValue); - - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - // k = kVals[i] / 256F; - Vector y = Unsafe.Add(ref yBase, i); - Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; - Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; - Vector k = Unsafe.Add(ref kBase, i) / max; - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector r = y + (cr * new Vector(1.402F)); - Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); - Vector b = y + (cb * new Vector(1.772F)); - - r = (max - r.FastRound()) * k; - g = (max - g.FastRound()) * k; - b = (max - b.FastRound()) * k; - r *= scale; - g *= scale; - b *= scale; - - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref rr, ref gg, ref bb); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs new file mode 100644 index 0000000000..c360392de8 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs @@ -0,0 +1,91 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKVector8 : Vector8JpegColorConverter + { + public FromYccKVector8(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-this.HalfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + var max = new Vector(this.MaximumValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + Vector k = Unsafe.Add(ref kBase, i) / max; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = (max - r.FastRound()) * k; + g = (max - g.FastRound()) * k; + b = (max - b.FastRound()) * k; + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs new file mode 100644 index 0000000000..3e9b889db7 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs @@ -0,0 +1,18 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class Vector8JpegColorConverter : VectorizedJpegColorConverter + { + protected Vector8JpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision, 8) + { + } + + protected sealed override bool IsAvailable => SimdUtils.HasVector8; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs new file mode 100644 index 0000000000..522be82c2d --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs @@ -0,0 +1,46 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class VectorizedJpegColorConverter : JpegColorConverter + { + private readonly int vectorSize; + + protected VectorizedJpegColorConverter(JpegColorSpace colorSpace, int precision, int vectorSize) + : base(colorSpace, precision) + { + this.vectorSize = vectorSize; + } + + public sealed override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % this.vectorSize; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!this.IsAvailable) + { + throw new InvalidOperationException( + "This converter can be used only on architecture having 256 byte floating point SIMD registers!"); + } + + this.ConvertCoreVectorized(values.Slice(0, simdCount), result.Slice(0, simdCount)); + } + + this.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder)); + } + + protected abstract void ConvertCoreVectorized(in ComponentValues values, Span result); + + protected abstract void ConvertCore(in ComponentValues values, Span result); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index c49ec7e387..7b3c11938c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Numerics; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; @@ -17,22 +18,7 @@ internal abstract partial class JpegColorConverter /// /// The available converters /// - private static readonly JpegColorConverter[] Converters = - { - // 8-bit converters - GetYCbCrConverter(8), - GetYccKConverter(8), - GetCmykConverter(8), - GetGrayScaleConverter(8), - GetRgbConverter(8), - - // 12-bit converters - GetYCbCrConverter(12), - GetYccKConverter(12), - GetCmykConverter(12), - GetGrayScaleConverter(12), - GetRgbConverter(12), - }; + private static readonly JpegColorConverter[] Converters = CreateConverters(); /// /// Initializes a new instance of the class. @@ -45,6 +31,12 @@ protected JpegColorConverter(JpegColorSpace colorSpace, int precision) this.HalfValue = MathF.Ceiling(this.MaximumValue / 2); } + /// + /// Gets a value indicating whether this is available + /// on the current runtime and CPU architecture. + /// + protected abstract bool IsAvailable { get; } + /// /// Gets the of this converter. /// @@ -89,34 +81,79 @@ public static JpegColorConverter GetConverter(JpegColorSpace colorSpace, int pre public abstract void ConvertToRgba(in ComponentValues values, Span result); /// - /// Returns the for the YCbCr colorspace that matches the current CPU architecture. + /// Returns the s for all supported colorspaces and precisions. /// - private static JpegColorConverter GetYCbCrConverter(int precision) => - FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision); + private static JpegColorConverter[] CreateConverters() + { + var converters = new List(); + + // 8-bit converters + converters.AddRange(GetYCbCrConverters(8)); + converters.AddRange(GetYccKConverters(8)); + converters.AddRange(GetCmykConverters(8)); + converters.AddRange(GetGrayScaleConverters(8)); + converters.AddRange(GetRgbConverters(8)); + + // 8-bit converters + converters.AddRange(GetYCbCrConverters(12)); + converters.AddRange(GetYccKConverters(12)); + converters.AddRange(GetCmykConverters(12)); + converters.AddRange(GetGrayScaleConverters(12)); + converters.AddRange(GetRgbConverters(12)); + + return converters.Where(x => x.IsAvailable).ToArray(); + } /// - /// Returns the for the YccK colorspace that matches the current CPU architecture. + /// Returns the s for the YCbCr colorspace. /// - private static JpegColorConverter GetYccKConverter(int precision) => - FromYccKVector8.IsAvailable ? (JpegColorConverter)new FromYccKVector8(precision) : new FromYccKBasic(precision); + private static IEnumerable GetYCbCrConverters(int precision) + { + yield return new FromYCbCrAvx2(precision); + yield return new FromYCbCrVector8(precision); + yield return new FromYCbCrVector(precision); + yield return new FromYCbCrBasic(precision); + } /// - /// Returns the for the CMYK colorspace that matches the current CPU architecture. + /// Returns the s for the YccK colorspace. /// - private static JpegColorConverter GetCmykConverter(int precision) => - FromCmykVector8.IsAvailable ? (JpegColorConverter)new FromCmykVector8(precision) : new FromCmykBasic(precision); + private static IEnumerable GetYccKConverters(int precision) + { + yield return new FromYccKAvx2(precision); + yield return new FromYccKVector8(precision); + yield return new FromYccKBasic(precision); + } /// - /// Returns the for the gray scale colorspace that matches the current CPU architecture. + /// Returns the s for the CMYK colorspace. /// - private static JpegColorConverter GetGrayScaleConverter(int precision) => - FromGrayscaleVector8.IsAvailable ? (JpegColorConverter)new FromGrayscaleVector8(precision) : new FromGrayscaleBasic(precision); + private static IEnumerable GetCmykConverters(int precision) + { + yield return new FromCmykAvx2(precision); + yield return new FromCmykVector8(precision); + yield return new FromCmykBasic(precision); + } /// - /// Returns the for the RGB colorspace that matches the current CPU architecture. + /// Returns the s for the gray scale colorspace. /// - private static JpegColorConverter GetRgbConverter(int precision) => - FromRgbVector8.IsAvailable ? (JpegColorConverter)new FromRgbVector8(precision) : new FromRgbBasic(precision); + private static IEnumerable GetGrayScaleConverters(int precision) + { + yield return new FromGrayscaleAvx2(precision); + yield return new FromGrayscaleVector8(precision); + yield return new FromGrayscaleBasic(precision); + } + + /// + /// Returns the s for the RGB colorspace. + /// + private static IEnumerable GetRgbConverters(int precision) + { + yield return new FromRgbAvx2(precision); + yield return new FromRgbVector8(precision); + yield return new FromRgbBasic(precision); + } /// /// A stack-only struct to reference the input buffers using -s. diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs index 7b47cf94a9..9b4b4568f5 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs @@ -41,7 +41,7 @@ public void Scalar() { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrBasic.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrBasic(8).ConvertToRgba(values, this.output); } [Benchmark(Baseline = true)] @@ -49,7 +49,7 @@ public void SimdVector4() { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrSimd.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrVector(8).ConvertToRgba(values, this.output); } [Benchmark] @@ -57,7 +57,15 @@ public void SimdVector8() { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrSimdVector8.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYCbCrAvx2(8).ConvertToRgba(values, this.output); } private static Buffer2D[] CreateRandomValues( diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 9abe318846..50513b289d 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -22,6 +22,7 @@ public class JpegColorConverterTests private static readonly ApproximateColorSpaceComparer ColorSpaceComparer = new ApproximateColorSpaceComparer(Precision); + // int inputBufferLength, int resultBufferLength, int seed public static readonly TheoryData CommonConversionData = new TheoryData { @@ -51,44 +52,30 @@ public void ConvertFromYCbCrBasic(int inputBufferLength, int resultBufferLength, seed); } - private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYCbCrVector(int inputBufferLength, int resultBufferLength, int seed) { - float y = values.Component0[i]; - float cb = values.Component1[i]; - float cr = values.Component2[i]; - var ycbcr = new YCbCr(y, cb, cr); - - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = ColorSpaceConverter.ToRgb(ycbcr); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + ValidateRgbToYCbCrConversion( + new JpegColorConverter.FromYCbCrVector(8), + 3, + inputBufferLength, + resultBufferLength, + seed); } [Theory] - [InlineData(64, 1)] - [InlineData(16, 2)] - [InlineData(8, 3)] - public void FromYCbCrSimd_ConvertCore(int size, int seed) + [MemberData(nameof(CommonConversionData))] + public void FromYCbCrVector8(int inputBufferLength, int resultBufferLength, int seed) { - JpegColorConverter.ComponentValues values = CreateRandomValues(3, size, seed); - var result = new Vector4[size]; - - JpegColorConverter.FromYCbCrSimd.ConvertCore(values, result, 255, 128); - - for (int i = 0; i < size; i++) + if (!SimdUtils.HasVector8) { - ValidateYCbCr(values, result, i); + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } - } - [Theory] - [MemberData(nameof(CommonConversionData))] - public void FromYCbCrSimd(int inputBufferLength, int resultBufferLength, int seed) - { ValidateRgbToYCbCrConversion( - new JpegColorConverter.FromYCbCrSimd(8), + new JpegColorConverter.FromYCbCrVector8(8), 3, inputBufferLength, resultBufferLength, @@ -97,9 +84,9 @@ public void FromYCbCrSimd(int inputBufferLength, int resultBufferLength, int see [Theory] [MemberData(nameof(CommonConversionData))] - public void FromYCbCrSimdAvx2(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrAvx2(int inputBufferLength, int resultBufferLength, int seed) { - if (!SimdUtils.HasVector8) + if (!SimdUtils.HasAvx2) { this.Output.WriteLine("No AVX2 present, skipping test!"); return; @@ -107,7 +94,7 @@ public void FromYCbCrSimdAvx2(int inputBufferLength, int resultBufferLength, int // JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s); ValidateRgbToYCbCrConversion( - new JpegColorConverter.FromYCbCrSimdVector8(8), + new JpegColorConverter.FromYCbCrAvx2(8), 3, inputBufferLength, resultBufferLength, @@ -138,7 +125,7 @@ public void BenchmarkYCbCr(bool simd) JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); var result = new Vector4[count]; - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrSimd(8) : new JpegColorConverter.FromYCbCrBasic(8); + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8); // Warm up: converter.ConvertToRgba(values, result); @@ -331,5 +318,20 @@ private static void ValidateRgbToYCbCrConversion( ValidateYCbCr(values, result, i); } } + + private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float y = values.Component0[i]; + float cb = values.Component1[i]; + float cr = values.Component2[i]; + var ycbcr = new YCbCr(y, cb, cr); + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = ColorSpaceConverter.ToRgb(ycbcr); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } } } From 7cc953e6a764e8d997ef8fd628c3b4102a63bb95 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Wed, 4 Nov 2020 18:57:41 +0100 Subject: [PATCH 03/10] Refactor and add tests --- .../Formats/Jpg/JpegColorConverterTests.cs | 438 +++++++++++++----- 1 file changed, 331 insertions(+), 107 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 50513b289d..8662a61921 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -42,9 +42,9 @@ public JpegColorConverterTests(ITestOutputHelper output) [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed) { - ValidateRgbToYCbCrConversion( + ValidateConversion( new JpegColorConverter.FromYCbCrBasic(8), 3, inputBufferLength, @@ -56,7 +56,7 @@ public void ConvertFromYCbCrBasic(int inputBufferLength, int resultBufferLength, [MemberData(nameof(CommonConversionData))] public void FromYCbCrVector(int inputBufferLength, int resultBufferLength, int seed) { - ValidateRgbToYCbCrConversion( + ValidateConversion( new JpegColorConverter.FromYCbCrVector(8), 3, inputBufferLength, @@ -74,7 +74,7 @@ public void FromYCbCrVector8(int inputBufferLength, int resultBufferLength, int return; } - ValidateRgbToYCbCrConversion( + ValidateConversion( new JpegColorConverter.FromYCbCrVector8(8), 3, inputBufferLength, @@ -92,8 +92,7 @@ public void FromYCbCrAvx2(int inputBufferLength, int resultBufferLength, int see return; } - // JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s); - ValidateRgbToYCbCrConversion( + ValidateConversion( new JpegColorConverter.FromYCbCrAvx2(8), 3, inputBufferLength, @@ -103,7 +102,7 @@ public void FromYCbCrAvx2(int inputBufferLength, int resultBufferLength, int see [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYCbCr_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCr_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { ValidateConversion( JpegColorSpace.YCbCr, @@ -113,149 +112,269 @@ public void ConvertFromYCbCr_WithDefaultConverter(int inputBufferLength, int res seed); } - // Benchmark, for local execution only - // [Theory] - // [InlineData(false)] - // [InlineData(true)] - public void BenchmarkYCbCr(bool simd) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromCmykBasic(int inputBufferLength, int resultBufferLength, int seed) { - int count = 2053; - int times = 50000; - - JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); - var result = new Vector4[count]; + ValidateConversion( + new JpegColorConverter.FromCmykBasic(8), + 4, + inputBufferLength, + resultBufferLength, + seed); + } - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromCmykVector8(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - // Warm up: - converter.ConvertToRgba(values, result); + ValidateConversion( + new JpegColorConverter.FromCmykVector8(8), + 4, + inputBufferLength, + resultBufferLength, + seed); + } - using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}")) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromCmykAvx2(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasAvx2) { - for (int i = 0; i < times; i++) - { - converter.ConvertToRgba(values, result); - } + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromCmykAvx2(8), + 4, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromCmyk(int inputBufferLength, int resultBufferLength, int seed) + public void FromCmyk_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { - var v = new Vector4(0, 0, 0, 1F); - var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + ValidateConversion( + JpegColorSpace.Cmyk, + 4, + inputBufferLength, + resultBufferLength, + seed); + } - var converter = JpegColorConverter.GetConverter(JpegColorSpace.Cmyk, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(4, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGrayscaleBasic(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + new JpegColorConverter.FromGrayscaleBasic(8), + 1, + inputBufferLength, + resultBufferLength, + seed); + } - converter.ConvertToRgba(values, result); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGrayscaleVector8(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - for (int i = 0; i < resultBufferLength; i++) + ValidateConversion( + new JpegColorConverter.FromGrayscaleVector8(8), + 1, + inputBufferLength, + resultBufferLength, + seed); + } + + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGrayscaleAvx2(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasAvx2) { - float c = values.Component0[i]; - float m = values.Component1[i]; - float y = values.Component2[i]; - float k = values.Component3[i] / 255F; + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - v.X = c * k; - v.Y = m * k; - v.Z = y * k; - v.W = 1F; + ValidateConversion( + new JpegColorConverter.FromGrayscaleAvx2(8), + 1, + inputBufferLength, + resultBufferLength, + seed); + } - v *= scale; + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGraysacle_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + JpegColorSpace.Grayscale, + 1, + inputBufferLength, + resultBufferLength, + seed); + } - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(v.X, v.Y, v.Z); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromRgbBasic(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + new JpegColorConverter.FromRgbBasic(8), + 3, + inputBufferLength, + resultBufferLength, + seed); + } - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromRgbVector8(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromRgbVector8(8), + 3, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromGrayScale(int inputBufferLength, int resultBufferLength, int seed) + public void FromRgbAvx2(int inputBufferLength, int resultBufferLength, int seed) { - var converter = JpegColorConverter.GetConverter(JpegColorSpace.Grayscale, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(1, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + if (!SimdUtils.HasAvx2) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - converter.ConvertToRgba(values, result); + ValidateConversion( + new JpegColorConverter.FromRgbAvx2(8), + 3, + inputBufferLength, + resultBufferLength, + seed); + } - for (int i = 0; i < resultBufferLength; i++) - { - float y = values.Component0[i]; - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(y / 255F, y / 255F, y / 255F); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromRgb_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + JpegColorSpace.RGB, + 3, + inputBufferLength, + resultBufferLength, + seed); + } - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); - } + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYccKBasic(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + new JpegColorConverter.FromYccKBasic(8), + 4, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromRgb(int inputBufferLength, int resultBufferLength, int seed) + public void FromYccKVector8(int inputBufferLength, int resultBufferLength, int seed) { - var converter = JpegColorConverter.GetConverter(JpegColorSpace.RGB, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(3, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - converter.ConvertToRgba(values, result); + ValidateConversion( + new JpegColorConverter.FromYccKVector8(8), + 4, + inputBufferLength, + resultBufferLength, + seed); + } - for (int i = 0; i < resultBufferLength; i++) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYccKAvx2(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasAvx2) { - float r = values.Component0[i]; - float g = values.Component1[i]; - float b = values.Component2[i]; - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(r / 255F, g / 255F, b / 255F); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromYccKAvx2(8), + 4, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYcck(int inputBufferLength, int resultBufferLength, int seed) + public void FromYcck_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { - var v = new Vector4(0, 0, 0, 1F); - var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + ValidateConversion( + JpegColorSpace.Ycck, + 4, + inputBufferLength, + resultBufferLength, + seed); + } - var converter = JpegColorConverter.GetConverter(JpegColorSpace.Ycck, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(4, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + // Benchmark, for local execution only + // [Theory] + // [InlineData(false)] + // [InlineData(true)] + public void BenchmarkYCbCr(bool simd) + { + int count = 2053; + int times = 50000; + JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); + var result = new Vector4[count]; + + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8); + + // Warm up: converter.ConvertToRgba(values, result); - for (int i = 0; i < resultBufferLength; i++) + using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}")) { - float y = values.Component0[i]; - float cb = values.Component1[i] - 128F; - float cr = values.Component2[i] - 128F; - float k = values.Component3[i] / 255F; - - v.X = (255F - (float)Math.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; - v.Y = (255F - (float)Math.Round( - y - (0.344136F * cb) - (0.714136F * cr), - MidpointRounding.AwayFromZero)) * k; - v.Z = (255F - (float)Math.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; - v.W = 1F; - - v *= scale; - - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(v.X, v.Y, v.Z); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + for (int i = 0; i < times; i++) + { + converter.ConvertToRgba(values, result); + } } } @@ -270,7 +389,7 @@ private static JpegColorConverter.ComponentValues CreateRandomValues( var buffers = new Buffer2D[componentCount]; for (int i = 0; i < componentCount; i++) { - float[] values = new float[inputBufferLength]; + var values = new float[inputBufferLength]; for (int j = 0; j < inputBufferLength; j++) { @@ -293,7 +412,7 @@ private static void ValidateConversion( int resultBufferLength, int seed) { - ValidateRgbToYCbCrConversion( + ValidateConversion( JpegColorConverter.GetConverter(colorSpace, 8), componentCount, inputBufferLength, @@ -301,7 +420,7 @@ private static void ValidateConversion( seed); } - private static void ValidateRgbToYCbCrConversion( + private static void ValidateConversion( JpegColorConverter converter, int componentCount, int inputBufferLength, @@ -315,7 +434,36 @@ private static void ValidateRgbToYCbCrConversion( for (int i = 0; i < resultBufferLength; i++) { - ValidateYCbCr(values, result, i); + Validate(converter.ColorSpace, values, result, i); + } + } + + private static void Validate( + JpegColorSpace colorSpace, + in JpegColorConverter.ComponentValues values, + Vector4[] result, + int i) + { + switch (colorSpace) + { + case JpegColorSpace.Grayscale: + ValidateGrayScale(values, result, i); + break; + case JpegColorSpace.Ycck: + ValidateCyyK(values, result, i); + break; + case JpegColorSpace.Cmyk: + ValidateCmyk(values, result, i); + break; + case JpegColorSpace.RGB: + ValidateRgb(values, result, i); + break; + case JpegColorSpace.YCbCr: + ValidateYCbCr(values, result, i); + break; + default: + Assert.True(false, $"Colorspace {colorSpace} not supported!"); + break; } } @@ -333,5 +481,81 @@ private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Assert.Equal(expected, actual, ColorSpaceComparer); Assert.Equal(1, rgba.W); } + + private static void ValidateCyyK(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + var v = new Vector4(0, 0, 0, 1F); + var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + + float y = values.Component0[i]; + float cb = values.Component1[i] - 128F; + float cr = values.Component2[i] - 128F; + float k = values.Component3[i] / 255F; + + v.X = (255F - (float)Math.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; + v.Y = (255F - (float)Math.Round( + y - (0.344136F * cb) - (0.714136F * cr), + MidpointRounding.AwayFromZero)) * k; + v.Z = (255F - (float)Math.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; + v.W = 1F; + + v *= scale; + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(v.X, v.Y, v.Z); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateRgb(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float r = values.Component0[i]; + float g = values.Component1[i]; + float b = values.Component2[i]; + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(r / 255F, g / 255F, b / 255F); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateGrayScale(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float y = values.Component0[i]; + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(y / 255F, y / 255F, y / 255F); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateCmyk(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + var v = new Vector4(0, 0, 0, 1F); + var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + + float c = values.Component0[i]; + float m = values.Component1[i]; + float y = values.Component2[i]; + float k = values.Component3[i] / 255F; + + v.X = c * k; + v.Y = m * k; + v.Z = y * k; + v.W = 1F; + + v *= scale; + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(v.X, v.Y, v.Z); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } } } From 89bb9fe4224085dbf9be49889ff13ca6c34da270 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Wed, 4 Nov 2020 21:14:54 +0100 Subject: [PATCH 04/10] Add benchmarks --- .../Codecs/Jpeg/CmykColorConversion.cs | 38 ++++++++++++ .../Codecs/Jpeg/ColorConversionBenchmark.cs | 61 +++++++++++++++++++ .../Codecs/Jpeg/GrayscaleColorConversion.cs | 38 ++++++++++++ .../Codecs/Jpeg/RgbColorConversion.cs | 38 ++++++++++++ .../Codecs/Jpeg/YCbCrColorConversion.cs | 53 ++-------------- .../Codecs/Jpeg/YccKColorConverter.cs | 38 ++++++++++++ 6 files changed, 217 insertions(+), 49 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs new file mode 100644 index 0000000000..64947408b0 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs @@ -0,0 +1,38 @@ +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class CmykColorConversion : ColorConversionBenchmark + { + public CmykColorConversion() + : base(4) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromCmykBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromCmykVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromCmykAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs new file mode 100644 index 0000000000..0461591286 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs @@ -0,0 +1,61 @@ +using System; +using System.Numerics; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Memory; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + public abstract class ColorConversionBenchmark + { + private readonly int componentCount; + protected Buffer2D[] input; + protected Vector4[] output; + + protected ColorConversionBenchmark(int componentCount) + { + this.componentCount = componentCount; + } + + public const int Count = 128; + + [GlobalSetup] + public void Setup() + { + this.input = CreateRandomValues(componentCount, Count); + this.output = new Vector4[Count]; + } + + [GlobalCleanup] + public void Cleanup() + { + foreach (Buffer2D buffer in this.input) + { + buffer.Dispose(); + } + } + + private static Buffer2D[] CreateRandomValues( + int componentCount, + int inputBufferLength, + float minVal = 0f, + float maxVal = 255f) + { + var rnd = new Random(42); + var buffers = new Buffer2D[componentCount]; + for (int i = 0; i < componentCount; i++) + { + var values = new float[inputBufferLength]; + + for (int j = 0; j < inputBufferLength; j++) + { + values[j] = ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } + + // no need to dispose when buffer is not array owner + buffers[i] = Configuration.Default.MemoryAllocator.Allocate2D(values.Length, 1); + } + + return buffers; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs new file mode 100644 index 0000000000..ee4d7b1bd4 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs @@ -0,0 +1,38 @@ +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class GrayscaleColorConversion : ColorConversionBenchmark + { + public GrayscaleColorConversion() + : base(1) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromGrayscaleBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromGrayscaleVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromGrayscaleAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs new file mode 100644 index 0000000000..6f20ffda29 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs @@ -0,0 +1,38 @@ +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class RgbColorConversion : ColorConversionBenchmark + { + public RgbColorConversion() + : base(3) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromRgbBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromRgbVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromRgbAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs index 9b4b4568f5..c17f14fe6c 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs @@ -1,39 +1,18 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System; -using System.Numerics; - using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; -using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { [Config(typeof(Config.ShortClr))] - public class YCbCrColorConversion + public class YCbCrColorConversion : ColorConversionBenchmark { - private Buffer2D[] input; - - private Vector4[] output; - - public const int Count = 128; - - [GlobalSetup] - public void Setup() - { - this.input = CreateRandomValues(3, Count); - this.output = new Vector4[Count]; - } - - [GlobalCleanup] - public void Cleanup() + public YCbCrColorConversion() + : base(3) { - foreach (Buffer2D buffer in this.input) - { - buffer.Dispose(); - } } [Benchmark] @@ -45,7 +24,7 @@ public void Scalar() } [Benchmark(Baseline = true)] - public void SimdVector4() + public void SimdVector() { var values = new JpegColorConverter.ComponentValues(this.input, 0); @@ -67,29 +46,5 @@ public void SimdVectorAvx2() new JpegColorConverter.FromYCbCrAvx2(8).ConvertToRgba(values, this.output); } - - private static Buffer2D[] CreateRandomValues( - int componentCount, - int inputBufferLength, - float minVal = 0f, - float maxVal = 255f) - { - var rnd = new Random(42); - var buffers = new Buffer2D[componentCount]; - for (int i = 0; i < componentCount; i++) - { - var values = new float[inputBufferLength]; - - for (int j = 0; j < inputBufferLength; j++) - { - values[j] = ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; - } - - // no need to dispose when buffer is not array owner - buffers[i] = Configuration.Default.MemoryAllocator.Allocate2D(values.Length, 1); - } - - return buffers; - } } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs new file mode 100644 index 0000000000..5357cecd2b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs @@ -0,0 +1,38 @@ +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class YccKColorConverter : ColorConversionBenchmark + { + public YccKColorConverter() + : base(4) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYccKBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYccKVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYccKAvx2(8).ConvertToRgba(values, this.output); + } + } +} From d76dbaa6939cc266c4b478476866a2c8e5afadce Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Fri, 6 Nov 2020 18:05:39 +0100 Subject: [PATCH 05/10] Drop FromGrayscaleVector8 --- ...JpegColorConverter.FromGrayScaleVector8.cs | 53 ------------------- .../ColorConverters/JpegColorConverter.cs | 1 - .../Codecs/Jpeg/GrayscaleColorConversion.cs | 8 --- .../Formats/Jpg/JpegColorConverterTests.cs | 18 ------- 4 files changed, 80 deletions(-) delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs deleted file mode 100644 index 75ea601012..0000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Tuples; - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromGrayscaleVector8 : Vector8JpegColorConverter - { - public FromGrayscaleVector8(int precision) - : base(JpegColorSpace.Grayscale, precision) - { - } - - protected override void ConvertCoreVectorized(in ComponentValues values, Span result) - { - ref Vector gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair gg = default; - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - - var scale = new Vector(1 / this.MaximumValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector g = Unsafe.Add(ref gBase, i); - g *= scale; - - ggRefAsVector = g; - - // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref gg); - } - } - - protected override void ConvertCore(in ComponentValues values, Span result) => - FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 7b3c11938c..7afc09ff56 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -141,7 +141,6 @@ private static IEnumerable GetCmykConverters(int precision) private static IEnumerable GetGrayScaleConverters(int precision) { yield return new FromGrayscaleAvx2(precision); - yield return new FromGrayscaleVector8(precision); yield return new FromGrayscaleBasic(precision); } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs index ee4d7b1bd4..3e8da6fb87 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs @@ -19,14 +19,6 @@ public void Scalar() new JpegColorConverter.FromGrayscaleBasic(8).ConvertToRgba(values, this.output); } - [Benchmark] - public void SimdVector8() - { - var values = new JpegColorConverter.ComponentValues(this.input, 0); - - new JpegColorConverter.FromGrayscaleVector8(8).ConvertToRgba(values, this.output); - } - [Benchmark] public void SimdVectorAvx2() { diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 8662a61921..bcc52de14a 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -184,24 +184,6 @@ public void FromGrayscaleBasic(int inputBufferLength, int resultBufferLength, in seed); } - [Theory] - [MemberData(nameof(CommonConversionData))] - public void FromGrayscaleVector8(int inputBufferLength, int resultBufferLength, int seed) - { - if (!SimdUtils.HasVector8) - { - this.Output.WriteLine("No AVX2 present, skipping test!"); - return; - } - - ValidateConversion( - new JpegColorConverter.FromGrayscaleVector8(8), - 1, - inputBufferLength, - resultBufferLength, - seed); - } - [Theory] [MemberData(nameof(CommonConversionData))] public void FromGrayscaleAvx2(int inputBufferLength, int resultBufferLength, int seed) From 82a2359ee52ed561d73004f2ab81735b2df3d277 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Fri, 6 Nov 2020 18:08:31 +0100 Subject: [PATCH 06/10] Polish benchmarks (fix new warnings) --- .../Codecs/Jpeg/CmykColorConversion.cs | 5 ++++- .../Codecs/Jpeg/ColorConversionBenchmark.cs | 7 +++++-- .../Codecs/Jpeg/GrayscaleColorConversion.cs | 5 ++++- .../Codecs/Jpeg/RgbColorConversion.cs | 5 ++++- .../Codecs/Jpeg/YccKColorConverter.cs | 5 ++++- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs index 64947408b0..1e6b9fe92f 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs @@ -1,4 +1,7 @@ -using BenchmarkDotNet.Attributes; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs index 0461591286..da8c517353 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; using System.Numerics; using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Memory; @@ -21,7 +24,7 @@ protected ColorConversionBenchmark(int componentCount) [GlobalSetup] public void Setup() { - this.input = CreateRandomValues(componentCount, Count); + this.input = CreateRandomValues(this.componentCount, Count); this.output = new Vector4[Count]; } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs index 3e8da6fb87..74b3e6db6f 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs @@ -1,4 +1,7 @@ -using BenchmarkDotNet.Attributes; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs index 6f20ffda29..68dc0f04e9 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs @@ -1,4 +1,7 @@ -using BenchmarkDotNet.Attributes; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs index 5357cecd2b..ed8758131b 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs @@ -1,4 +1,7 @@ -using BenchmarkDotNet.Attributes; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg From 8212e60d535b627cc033a09c83a650a9da280eed Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 21:23:51 +0000 Subject: [PATCH 07/10] Respond to Anton fedback --- src/ImageSharp/Common/Helpers/SimdUtils.cs | 7 +++++++ ...pegColorConverter.Avx2JpegColorConverter.cs | 6 +----- .../JpegColorConverter.FromCmykAvx2.cs | 2 +- ... => JpegColorConverter.FromYCbCrVector4.cs} | 8 ++++---- .../ColorConverters/JpegColorConverter.cs | 18 ++++++++++++++---- .../Codecs/Jpeg/YCbCrColorConversion.cs | 2 +- .../Formats/Jpg/JpegColorConverterTests.cs | 12 +++++++++--- 7 files changed, 37 insertions(+), 18 deletions(-) rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromYCbCrVector.cs => JpegColorConverter.FromYCbCrVector4.cs} (94%) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 073a5a9f67..7cbb5bfe35 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -25,6 +25,13 @@ internal static partial class SimdUtils public static bool HasVector8 { get; } = Vector.IsHardwareAccelerated && Vector.Count == 8 && Vector.Count == 8; + /// + /// Gets a value indicating whether code is being JIT-ed to SSE instructions + /// where float and integer registers are of size 128 byte. + /// + public static bool HasVector4 { get; } = + Vector.IsHardwareAccelerated && Vector.Count == 4; + public static bool HasAvx2 { get diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs index ef144fc1c7..90ebce3b87 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs @@ -1,10 +1,6 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif - namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs index a9e3e5b51d..f9334de73b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs similarity index 94% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs index 65a7c42d8d..193d7e71ee 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs @@ -11,14 +11,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrVector : VectorizedJpegColorConverter + internal sealed class FromYCbCrVector4 : VectorizedJpegColorConverter { - public FromYCbCrVector(int precision) + public FromYCbCrVector4(int precision) : base(JpegColorSpace.YCbCr, precision, 8) { } - protected override bool IsAvailable => true; + protected override bool IsAvailable => SimdUtils.HasVector4; protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { @@ -74,7 +74,7 @@ protected override void ConvertCoreVectorized(in ComponentValues values, Span.Count == 4) + if (SimdUtils.HasVector4) { // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector is terrible?) r.RoundAndDownscalePreVector8(maxValue); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 7afc09ff56..b794e3a26f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -62,8 +62,10 @@ protected JpegColorConverter(JpegColorSpace colorSpace, int precision) /// public static JpegColorConverter GetConverter(JpegColorSpace colorSpace, int precision) { - JpegColorConverter converter = Array.Find(Converters, c => c.ColorSpace == colorSpace - && c.Precision == precision); + JpegColorConverter converter = Array.Find( + Converters, + c => c.ColorSpace == colorSpace + && c.Precision == precision); if (converter is null) { @@ -94,7 +96,7 @@ private static JpegColorConverter[] CreateConverters() converters.AddRange(GetGrayScaleConverters(8)); converters.AddRange(GetRgbConverters(8)); - // 8-bit converters + // 12-bit converters converters.AddRange(GetYCbCrConverters(12)); converters.AddRange(GetYccKConverters(12)); converters.AddRange(GetCmykConverters(12)); @@ -109,9 +111,11 @@ private static JpegColorConverter[] CreateConverters() /// private static IEnumerable GetYCbCrConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromYCbCrAvx2(precision); +#endif yield return new FromYCbCrVector8(precision); - yield return new FromYCbCrVector(precision); + yield return new FromYCbCrVector4(precision); yield return new FromYCbCrBasic(precision); } @@ -120,7 +124,9 @@ private static IEnumerable GetYCbCrConverters(int precision) /// private static IEnumerable GetYccKConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromYccKAvx2(precision); +#endif yield return new FromYccKVector8(precision); yield return new FromYccKBasic(precision); } @@ -130,7 +136,9 @@ private static IEnumerable GetYccKConverters(int precision) /// private static IEnumerable GetCmykConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromCmykAvx2(precision); +#endif yield return new FromCmykVector8(precision); yield return new FromCmykBasic(precision); } @@ -140,7 +148,9 @@ private static IEnumerable GetCmykConverters(int precision) /// private static IEnumerable GetGrayScaleConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromGrayscaleAvx2(precision); +#endif yield return new FromGrayscaleBasic(precision); } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs index c17f14fe6c..94b28e4d9e 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs @@ -28,7 +28,7 @@ public void SimdVector() { var values = new JpegColorConverter.ComponentValues(this.input, 0); - new JpegColorConverter.FromYCbCrVector(8).ConvertToRgba(values, this.output); + new JpegColorConverter.FromYCbCrVector4(8).ConvertToRgba(values, this.output); } [Benchmark] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index bcc52de14a..68210caeaf 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -54,10 +54,16 @@ public void FromYCbCrBasic(int inputBufferLength, int resultBufferLength, int se [Theory] [MemberData(nameof(CommonConversionData))] - public void FromYCbCrVector(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrVector4(int inputBufferLength, int resultBufferLength, int seed) { + if (!SimdUtils.HasVector4) + { + this.Output.WriteLine("No SSE present, skipping test!"); + return; + } + ValidateConversion( - new JpegColorConverter.FromYCbCrVector(8), + new JpegColorConverter.FromYCbCrVector4(8), 3, inputBufferLength, resultBufferLength, @@ -346,7 +352,7 @@ public void BenchmarkYCbCr(bool simd) JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); var result = new Vector4[count]; - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8); + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector4(8) : new JpegColorConverter.FromYCbCrBasic(8); // Warm up: converter.ConvertToRgba(values, result); From 7a3bdf340bd49cab3956b3604f7fdfd059946749 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 21:48:26 +0000 Subject: [PATCH 08/10] Add missing conditional --- .../Components/Decoder/ColorConverters/JpegColorConverter.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index b794e3a26f..2d24f01dd8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -159,7 +159,9 @@ private static IEnumerable GetGrayScaleConverters(int precis /// private static IEnumerable GetRgbConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromRgbAvx2(precision); +#endif yield return new FromRgbVector8(precision); yield return new FromRgbBasic(precision); } From 8f5ae2dd60619bff3c7341db1f1fc744ac5bc222 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Sat, 7 Nov 2020 10:44:36 +0100 Subject: [PATCH 09/10] Micro-optimize grayscale converter --- .../JpegColorConverter.FromGrayScaleAvx2.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs index aebffc3dfc..45846a6b56 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs @@ -42,16 +42,16 @@ protected override void ConvertCoreVectorized(in ComponentValues values, Span g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); + Vector256 g = Avx.Multiply(Unsafe.Add(ref gBase, i), scale); - g = Avx.Multiply(g, scale); + g = Avx2.PermuteVar8x32(g, vcontrol); ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); - Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); - Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); - Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); + Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Shuffle(g, g, 0b01_01_01_01), one, 0b1000_1000); + Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Shuffle(g, g, 0b10_10_10_10), one, 0b1000_1000); + Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Shuffle(g, g, 0b11_11_11_11), one, 0b1000_1000); } #endif } From 0aa3ba50712983f89bde5aa67c6d4748a27fe2b6 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Sat, 7 Nov 2020 11:35:21 +0100 Subject: [PATCH 10/10] Remove useless branch --- .../JpegColorConverter.FromYCbCrVector4.cs | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs index 193d7e71ee..42f8eef5a1 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs @@ -22,6 +22,7 @@ public FromYCbCrVector4(int precision) protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { + // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector is terrible?) DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!"); ref Vector4Pair yBase = @@ -74,25 +75,9 @@ protected override void ConvertCoreVectorized(in ComponentValues values, Span is terrible?) - r.RoundAndDownscalePreVector8(maxValue); - g.RoundAndDownscalePreVector8(maxValue); - b.RoundAndDownscalePreVector8(maxValue); - } - else if (SimdUtils.HasVector8) - { - r.RoundAndDownscaleVector8(maxValue); - g.RoundAndDownscaleVector8(maxValue); - b.RoundAndDownscaleVector8(maxValue); - } - else - { - // TODO: Run fallback scalar code here - // However, no issues expected before someone implements this: https://github.com/dotnet/coreclr/issues/12007 - JpegThrowHelper.ThrowNotImplementedException("Your CPU architecture is too modern!"); - } + r.RoundAndDownscalePreVector8(maxValue); + g.RoundAndDownscalePreVector8(maxValue); + b.RoundAndDownscalePreVector8(maxValue); // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);