From 151bacc0203f097f7aa09d6f3adb52cd97171a78 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sat, 23 Oct 2021 19:22:57 +0200 Subject: [PATCH 01/85] Use Convert.To after rounding to avoid different behavior on ARM vs x86/x64 --- .../PixelImplementations/NormalizedByte2.cs | 12 ++++++---- .../PixelImplementations/NormalizedByte4.cs | 20 ++++++++-------- .../PixelImplementations/NormalizedShort2.cs | 11 +++++---- .../PixelImplementations/NormalizedShort4.cs | 23 ++++++++++--------- .../PixelImplementations/Short2.cs | 4 ++-- .../PixelImplementations/Short4.cs | 8 +++---- 6 files changed, 42 insertions(+), 36 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte2.cs b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte2.cs index 8b244d391c..720a1eef65 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte2.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte2.cs @@ -15,7 +15,9 @@ namespace SixLabors.ImageSharp.PixelFormats /// public partial struct NormalizedByte2 : IPixel, IPackedVector { - private static readonly Vector2 Half = new Vector2(127); + private const float MaxPos = 127F; + + private static readonly Vector2 Half = new Vector2(MaxPos); private static readonly Vector2 MinusOne = new Vector2(-1F); /// @@ -154,8 +156,8 @@ public void FromVector4(Vector4 vector) public readonly Vector2 ToVector2() { return new Vector2( - (sbyte)((this.PackedValue >> 0) & 0xFF) / 127F, - (sbyte)((this.PackedValue >> 8) & 0xFF) / 127F); + (sbyte)((this.PackedValue >> 0) & 0xFF) / MaxPos, + (sbyte)((this.PackedValue >> 8) & 0xFF) / MaxPos); } /// @@ -181,8 +183,8 @@ private static ushort Pack(Vector2 vector) { vector = Vector2.Clamp(vector, MinusOne, Vector2.One) * Half; - int byte2 = ((ushort)Math.Round(vector.X) & 0xFF) << 0; - int byte1 = ((ushort)Math.Round(vector.Y) & 0xFF) << 8; + int byte2 = ((ushort)Convert.ToInt16(Math.Round(vector.X)) & 0xFF) << 0; + int byte1 = ((ushort)Convert.ToInt16(Math.Round(vector.Y)) & 0xFF) << 8; return (ushort)(byte2 | byte1); } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte4.cs b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte4.cs index 84f0bb0224..d1b4b73f2b 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte4.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte4.cs @@ -15,7 +15,9 @@ namespace SixLabors.ImageSharp.PixelFormats /// public partial struct NormalizedByte4 : IPixel, IPackedVector { - private static readonly Vector4 Half = new Vector4(127); + private const float MaxPos = 127F; + + private static readonly Vector4 Half = new Vector4(MaxPos); private static readonly Vector4 MinusOne = new Vector4(-1F); /// @@ -92,10 +94,10 @@ public readonly Vector4 ToScaledVector4() public readonly Vector4 ToVector4() { return new Vector4( - (sbyte)((this.PackedValue >> 0) & 0xFF) / 127F, - (sbyte)((this.PackedValue >> 8) & 0xFF) / 127F, - (sbyte)((this.PackedValue >> 16) & 0xFF) / 127F, - (sbyte)((this.PackedValue >> 24) & 0xFF) / 127F); + (sbyte)((this.PackedValue >> 0) & 0xFF) / MaxPos, + (sbyte)((this.PackedValue >> 8) & 0xFF) / MaxPos, + (sbyte)((this.PackedValue >> 16) & 0xFF) / MaxPos, + (sbyte)((this.PackedValue >> 24) & 0xFF) / MaxPos); } /// @@ -176,10 +178,10 @@ private static uint Pack(ref Vector4 vector) { vector = Numerics.Clamp(vector, MinusOne, Vector4.One) * Half; - uint byte4 = ((uint)MathF.Round(vector.X) & 0xFF) << 0; - uint byte3 = ((uint)MathF.Round(vector.Y) & 0xFF) << 8; - uint byte2 = ((uint)MathF.Round(vector.Z) & 0xFF) << 16; - uint byte1 = ((uint)MathF.Round(vector.W) & 0xFF) << 24; + uint byte4 = ((uint)Convert.ToInt16(MathF.Round(vector.X)) & 0xFF) << 0; + uint byte3 = ((uint)Convert.ToInt16(MathF.Round(vector.Y)) & 0xFF) << 8; + uint byte2 = ((uint)Convert.ToInt16(MathF.Round(vector.Z)) & 0xFF) << 16; + uint byte1 = ((uint)Convert.ToInt16(MathF.Round(vector.W)) & 0xFF) << 24; return byte4 | byte3 | byte2 | byte1; } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort2.cs b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort2.cs index 97bbc1206f..d08a546031 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort2.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort2.cs @@ -15,7 +15,10 @@ namespace SixLabors.ImageSharp.PixelFormats /// public partial struct NormalizedShort2 : IPixel, IPackedVector { - private static readonly Vector2 Max = new Vector2(0x7FFF); + // Largest two byte positive number 0xFFFF >> 1; + private const float MaxPos = 0x7FFF; + + private static readonly Vector2 Max = new Vector2(MaxPos); private static readonly Vector2 Min = Vector2.Negate(Max); /// @@ -156,11 +159,9 @@ public void ToRgba32(ref Rgba32 dest) [MethodImpl(InliningOptions.ShortMethod)] public readonly Vector2 ToVector2() { - const float MaxVal = 0x7FFF; - return new Vector2( - (short)(this.PackedValue & 0xFFFF) / MaxVal, - (short)(this.PackedValue >> 0x10) / MaxVal); + (short)(this.PackedValue & 0xFFFF) / MaxPos, + (short)(this.PackedValue >> 0x10) / MaxPos); } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort4.cs b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort4.cs index a3fd8989ce..158b6eb4b0 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort4.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort4.cs @@ -15,7 +15,10 @@ namespace SixLabors.ImageSharp.PixelFormats /// public partial struct NormalizedShort4 : IPixel, IPackedVector { - private static readonly Vector4 Max = new Vector4(0x7FFF); + // Largest two byte positive number 0xFFFF >> 1; + private const float MaxPos = 0x7FFF; + + private static readonly Vector4 Max = new Vector4(MaxPos); private static readonly Vector4 Min = Vector4.Negate(Max); /// @@ -91,13 +94,11 @@ public readonly Vector4 ToScaledVector4() [MethodImpl(InliningOptions.ShortMethod)] public readonly Vector4 ToVector4() { - const float MaxVal = 0x7FFF; - return new Vector4( - (short)((this.PackedValue >> 0x00) & 0xFFFF) / MaxVal, - (short)((this.PackedValue >> 0x10) & 0xFFFF) / MaxVal, - (short)((this.PackedValue >> 0x20) & 0xFFFF) / MaxVal, - (short)((this.PackedValue >> 0x30) & 0xFFFF) / MaxVal); + (short)((this.PackedValue >> 0x00) & 0xFFFF) / MaxPos, + (short)((this.PackedValue >> 0x10) & 0xFFFF) / MaxPos, + (short)((this.PackedValue >> 0x20) & 0xFFFF) / MaxPos, + (short)((this.PackedValue >> 0x30) & 0xFFFF) / MaxPos); } /// @@ -180,10 +181,10 @@ private static ulong Pack(ref Vector4 vector) vector = Numerics.Clamp(vector, Min, Max); // Round rather than truncate. - ulong word4 = ((ulong)MathF.Round(vector.X) & 0xFFFF) << 0x00; - ulong word3 = ((ulong)MathF.Round(vector.Y) & 0xFFFF) << 0x10; - ulong word2 = ((ulong)MathF.Round(vector.Z) & 0xFFFF) << 0x20; - ulong word1 = ((ulong)MathF.Round(vector.W) & 0xFFFF) << 0x30; + ulong word4 = ((ulong)Convert.ToInt32(MathF.Round(vector.X)) & 0xFFFF) << 0x00; + ulong word3 = ((ulong)Convert.ToInt32(MathF.Round(vector.Y)) & 0xFFFF) << 0x10; + ulong word2 = ((ulong)Convert.ToInt32(MathF.Round(vector.Z)) & 0xFFFF) << 0x20; + ulong word1 = ((ulong)Convert.ToInt32(MathF.Round(vector.W)) & 0xFFFF) << 0x30; return word4 | word3 | word2 | word1; } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Short2.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Short2.cs index f7a4f99945..101027a78e 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Short2.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Short2.cs @@ -181,8 +181,8 @@ public override readonly string ToString() private static uint Pack(Vector2 vector) { vector = Vector2.Clamp(vector, Min, Max); - uint word2 = (uint)Math.Round(vector.X) & 0xFFFF; - uint word1 = ((uint)Math.Round(vector.Y) & 0xFFFF) << 0x10; + uint word2 = (uint)Convert.ToInt32(Math.Round(vector.X)) & 0xFFFF; + uint word1 = ((uint)Convert.ToInt32(Math.Round(vector.Y)) & 0xFFFF) << 0x10; return word2 | word1; } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Short4.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Short4.cs index 409f46c721..86a519297b 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Short4.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Short4.cs @@ -186,10 +186,10 @@ private static ulong Pack(ref Vector4 vector) vector = Numerics.Clamp(vector, Min, Max); // Clamp the value between min and max values - ulong word4 = ((ulong)Math.Round(vector.X) & 0xFFFF) << 0x00; - ulong word3 = ((ulong)Math.Round(vector.Y) & 0xFFFF) << 0x10; - ulong word2 = ((ulong)Math.Round(vector.Z) & 0xFFFF) << 0x20; - ulong word1 = ((ulong)Math.Round(vector.W) & 0xFFFF) << 0x30; + ulong word4 = ((ulong)Convert.ToInt32(Math.Round(vector.X)) & 0xFFFF) << 0x00; + ulong word3 = ((ulong)Convert.ToInt32(Math.Round(vector.Y)) & 0xFFFF) << 0x10; + ulong word2 = ((ulong)Convert.ToInt32(Math.Round(vector.Z)) & 0xFFFF) << 0x20; + ulong word1 = ((ulong)Convert.ToInt32(Math.Round(vector.W)) & 0xFFFF) << 0x30; return word4 | word3 | word2 | word1; } From 49e57722b815ec550e15cd41fe4e3202abe5287c Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sat, 23 Oct 2021 20:05:25 +0200 Subject: [PATCH 02/85] Cleanup --- .../PixelFormats/PixelImplementations/A8.cs | 2 +- .../PixelImplementations/Argb32.cs | 6 +-- .../PixelImplementations/Bgr24.cs | 4 +- .../PixelImplementations/Bgr565.cs | 12 ++---- .../PixelImplementations/Bgra32.cs | 6 +-- .../PixelImplementations/Bgra4444.cs | 5 +-- .../PixelImplementations/Bgra5551.cs | 10 +---- .../PixelImplementations/Byte4.cs | 10 +---- .../PixelImplementations/HalfSingle.cs | 7 +--- .../PixelImplementations/HalfVector2.cs | 5 +-- .../PixelImplementations/HalfVector4.cs | 10 +---- .../PixelFormats/PixelImplementations/L16.cs | 25 +++-------- .../PixelFormats/PixelImplementations/L8.cs | 4 +- .../PixelFormats/PixelImplementations/La16.cs | 6 +-- .../PixelFormats/PixelImplementations/La32.cs | 2 +- .../PixelImplementations/NormalizedByte2.cs | 11 ++--- .../PixelImplementations/NormalizedByte4.cs | 14 ++----- .../PixelImplementations/NormalizedShort2.cs | 12 ++---- .../PixelImplementations/NormalizedShort4.cs | 12 ++---- .../PixelFormats/PixelImplementations/Rg32.cs | 9 ++-- .../PixelImplementations/Rgb24.cs | 4 +- .../PixelImplementations/Rgb48.cs | 2 +- .../PixelImplementations/Rgba1010102.cs | 12 ++---- .../PixelImplementations/Rgba32.cs | 19 ++++----- .../PixelImplementations/Rgba64.cs | 2 +- .../PixelImplementations/RgbaVector.cs | 13 +++--- .../PixelImplementations/Short2.cs | 8 ++-- .../PixelFormats/Bgr24Tests.cs | 3 +- .../PixelFormats/Bgra32Tests.cs | 11 +++-- .../ImageSharp.Tests/PixelFormats/L8Tests.cs | 24 +---------- .../PixelFormats/La16Tests.cs | 24 +---------- .../PixelFormats/PixelBlenderTests.cs | 6 +-- .../PixelFormats/PixelConverterTests.cs | 42 +++++++++---------- .../PixelFormats/Rgb24Tests.cs | 4 +- .../PixelFormats/UnPackedPixelTests.cs | 2 +- 35 files changed, 111 insertions(+), 237 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/A8.cs b/src/ImageSharp/PixelFormats/PixelImplementations/A8.cs index 77df2bc800..cca7ff7db9 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/A8.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/A8.cs @@ -73,7 +73,7 @@ public partial struct A8 : IPixel, IPackedVector /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() => new Vector4(0, 0, 0, this.PackedValue / 255F); + public readonly Vector4 ToVector4() => new(0, 0, 0, this.PackedValue / 255F); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Argb32.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Argb32.cs index 3ac9b523f3..8c1b04ff1f 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Argb32.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Argb32.cs @@ -44,12 +44,12 @@ public partial struct Argb32 : IPixel, IPackedVector /// /// The maximum byte value. /// - private static readonly Vector4 MaxBytes = new Vector4(255); + private static readonly Vector4 MaxBytes = new(255); /// /// The half vector value. /// - private static readonly Vector4 Half = new Vector4(0.5F); + private static readonly Vector4 Half = new(0.5F); /// /// Initializes a new instance of the struct. @@ -151,7 +151,7 @@ public uint PackedValue /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static implicit operator Color(Argb32 source) => new Color(source); + public static implicit operator Color(Argb32 source) => new(source); /// /// Converts a to . diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Bgr24.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Bgr24.cs index 6cff5fd772..22e983a654 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Bgr24.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Bgr24.cs @@ -56,7 +56,7 @@ public Bgr24(byte r, byte g, byte b) /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static implicit operator Color(Bgr24 source) => new Color(source); + public static implicit operator Color(Bgr24 source) => new(source); /// /// Converts a to . @@ -225,7 +225,7 @@ public void FromRgba64(Rgba64 source) public override readonly bool Equals(object obj) => obj is Bgr24 other && this.Equals(other); /// - public override readonly string ToString() => $"Bgra({this.B}, {this.G}, {this.R})"; + public override readonly string ToString() => $"Bgr24({this.B}, {this.G}, {this.R})"; /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Bgr565.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Bgr565.cs index fd12b68376..5585310b91 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Bgr565.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Bgr565.cs @@ -81,7 +81,7 @@ public void FromVector4(Vector4 vector) /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() => new Vector4(this.ToVector3(), 1F); + public readonly Vector4 ToVector4() => new(this.ToVector3(), 1F); /// [MethodImpl(InliningOptions.ShortMethod)] @@ -125,10 +125,7 @@ public void FromVector4(Vector4 vector) /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] @@ -144,13 +141,10 @@ public void ToRgba32(ref Rgba32 dest) /// /// The . [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector3 ToVector3() - { - return new Vector3( + public readonly Vector3 ToVector3() => new( ((this.PackedValue >> 11) & 0x1F) * (1F / 31F), ((this.PackedValue >> 5) & 0x3F) * (1F / 63F), (this.PackedValue & 0x1F) * (1F / 31F)); - } /// public override readonly bool Equals(object obj) => obj is Bgr565 other && this.Equals(other); diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Bgra32.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Bgra32.cs index 190345ddaf..be4e178c24 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Bgra32.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Bgra32.cs @@ -41,12 +41,12 @@ public partial struct Bgra32 : IPixel, IPackedVector /// /// The maximum byte value. /// - private static readonly Vector4 MaxBytes = new Vector4(255); + private static readonly Vector4 MaxBytes = new(255); /// /// The half vector value. /// - private static readonly Vector4 Half = new Vector4(0.5F); + private static readonly Vector4 Half = new(0.5F); /// /// Initializes a new instance of the struct. @@ -104,7 +104,7 @@ public uint PackedValue /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static implicit operator Color(Bgra32 source) => new Color(source); + public static implicit operator Color(Bgra32 source) => new(source); /// /// Converts a to . diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Bgra4444.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Bgra4444.cs index 8fa5219d53..3578f1dd38 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Bgra4444.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Bgra4444.cs @@ -128,10 +128,7 @@ public readonly Vector4 ToVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Bgra5551.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Bgra5551.cs index b3a0d08960..0254397c3f 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Bgra5551.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Bgra5551.cs @@ -78,14 +78,11 @@ public Bgra5551(float x, float y, float z, float w) /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() - { - return new Vector4( + public readonly Vector4 ToVector4() => new( ((this.PackedValue >> 10) & 0x1F) / 31F, ((this.PackedValue >> 5) & 0x1F) / 31F, ((this.PackedValue >> 0) & 0x1F) / 31F, (this.PackedValue >> 15) & 0x01); - } /// [MethodImpl(InliningOptions.ShortMethod)] @@ -129,10 +126,7 @@ public readonly Vector4 ToVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Byte4.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Byte4.cs index e261212918..0995f8417f 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Byte4.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Byte4.cs @@ -78,14 +78,11 @@ public Byte4(float x, float y, float z, float w) /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() - { - return new Vector4( + public readonly Vector4 ToVector4() => new( this.PackedValue & 0xFF, (this.PackedValue >> 0x8) & 0xFF, (this.PackedValue >> 0x10) & 0xFF, (this.PackedValue >> 0x18) & 0xFF); - } /// [MethodImpl(InliningOptions.ShortMethod)] @@ -129,10 +126,7 @@ public readonly Vector4 ToVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/HalfSingle.cs b/src/ImageSharp/PixelFormats/PixelImplementations/HalfSingle.cs index 5c4aa1cfb6..b0ef0f6a9b 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/HalfSingle.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/HalfSingle.cs @@ -74,7 +74,7 @@ public readonly Vector4 ToScaledVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() => new Vector4(this.ToSingle(), 0, 0, 1F); + public readonly Vector4 ToVector4() => new(this.ToSingle(), 0, 0, 1F); /// [MethodImpl(InliningOptions.ShortMethod)] @@ -118,10 +118,7 @@ public readonly Vector4 ToScaledVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/HalfVector2.cs b/src/ImageSharp/PixelFormats/PixelImplementations/HalfVector2.cs index 39cb6f7993..8be8261302 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/HalfVector2.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/HalfVector2.cs @@ -129,10 +129,7 @@ public readonly Vector4 ToVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/HalfVector4.cs b/src/ImageSharp/PixelFormats/PixelImplementations/HalfVector4.cs index 9826d61a2b..955b274acb 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/HalfVector4.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/HalfVector4.cs @@ -86,14 +86,11 @@ public readonly Vector4 ToScaledVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() - { - return new Vector4( + public readonly Vector4 ToVector4() => new( HalfTypeHelper.Unpack((ushort)this.PackedValue), HalfTypeHelper.Unpack((ushort)(this.PackedValue >> 0x10)), HalfTypeHelper.Unpack((ushort)(this.PackedValue >> 0x20)), HalfTypeHelper.Unpack((ushort)(this.PackedValue >> 0x30))); - } /// [MethodImpl(InliningOptions.ShortMethod)] @@ -137,10 +134,7 @@ public readonly Vector4 ToVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/L16.cs b/src/ImageSharp/PixelFormats/PixelImplementations/L16.cs index dd31aae2fc..6d1128dd2c 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/L16.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/L16.cs @@ -72,33 +72,24 @@ public readonly Vector4 ToVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void FromArgb32(Argb32 source) - { - this.PackedValue = ColorNumerics.Get16BitBT709Luminance( + public void FromArgb32(Argb32 source) => this.PackedValue = ColorNumerics.Get16BitBT709Luminance( ColorNumerics.UpscaleFrom8BitTo16Bit(source.R), ColorNumerics.UpscaleFrom8BitTo16Bit(source.G), ColorNumerics.UpscaleFrom8BitTo16Bit(source.B)); - } /// [MethodImpl(InliningOptions.ShortMethod)] - public void FromBgr24(Bgr24 source) - { - this.PackedValue = ColorNumerics.Get16BitBT709Luminance( + public void FromBgr24(Bgr24 source) => this.PackedValue = ColorNumerics.Get16BitBT709Luminance( ColorNumerics.UpscaleFrom8BitTo16Bit(source.R), ColorNumerics.UpscaleFrom8BitTo16Bit(source.G), ColorNumerics.UpscaleFrom8BitTo16Bit(source.B)); - } /// [MethodImpl(InliningOptions.ShortMethod)] - public void FromBgra32(Bgra32 source) - { - this.PackedValue = ColorNumerics.Get16BitBT709Luminance( + public void FromBgra32(Bgra32 source) => this.PackedValue = ColorNumerics.Get16BitBT709Luminance( ColorNumerics.UpscaleFrom8BitTo16Bit(source.R), ColorNumerics.UpscaleFrom8BitTo16Bit(source.G), ColorNumerics.UpscaleFrom8BitTo16Bit(source.B)); - } /// [MethodImpl(InliningOptions.ShortMethod)] @@ -122,23 +113,17 @@ public void FromBgra32(Bgra32 source) /// [MethodImpl(InliningOptions.ShortMethod)] - public void FromRgb24(Rgb24 source) - { - this.PackedValue = ColorNumerics.Get16BitBT709Luminance( + public void FromRgb24(Rgb24 source) => this.PackedValue = ColorNumerics.Get16BitBT709Luminance( ColorNumerics.UpscaleFrom8BitTo16Bit(source.R), ColorNumerics.UpscaleFrom8BitTo16Bit(source.G), ColorNumerics.UpscaleFrom8BitTo16Bit(source.B)); - } /// [MethodImpl(InliningOptions.ShortMethod)] - public void FromRgba32(Rgba32 source) - { - this.PackedValue = ColorNumerics.Get16BitBT709Luminance( + public void FromRgba32(Rgba32 source) => this.PackedValue = ColorNumerics.Get16BitBT709Luminance( ColorNumerics.UpscaleFrom8BitTo16Bit(source.R), ColorNumerics.UpscaleFrom8BitTo16Bit(source.G), ColorNumerics.UpscaleFrom8BitTo16Bit(source.B)); - } /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/L8.cs b/src/ImageSharp/PixelFormats/PixelImplementations/L8.cs index c570c33a19..ffff60be52 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/L8.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/L8.cs @@ -14,8 +14,8 @@ namespace SixLabors.ImageSharp.PixelFormats /// public partial struct L8 : IPixel, IPackedVector { - private static readonly Vector4 MaxBytes = new Vector4(255F); - private static readonly Vector4 Half = new Vector4(0.5F); + private static readonly Vector4 MaxBytes = new(255F); + private static readonly Vector4 Half = new(0.5F); /// /// Initializes a new instance of the struct. diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/La16.cs b/src/ImageSharp/PixelFormats/PixelImplementations/La16.cs index 5a69431a1d..877aaed81c 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/La16.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/La16.cs @@ -16,8 +16,8 @@ namespace SixLabors.ImageSharp.PixelFormats [StructLayout(LayoutKind.Explicit)] public partial struct La16 : IPixel, IPackedVector { - private static readonly Vector4 MaxBytes = new Vector4(255F); - private static readonly Vector4 Half = new Vector4(0.5F); + private static readonly Vector4 MaxBytes = new(255F); + private static readonly Vector4 Half = new(0.5F); /// /// Gets or sets the luminance component. @@ -35,7 +35,7 @@ public partial struct La16 : IPixel, IPackedVector /// Initializes a new instance of the struct. /// /// The luminance component. - /// The alpha componant. + /// The alpha component. public La16(byte l, byte a) { this.L = l; diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/La32.cs b/src/ImageSharp/PixelFormats/PixelImplementations/La32.cs index 66d0e38c79..f19f228136 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/La32.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/La32.cs @@ -35,7 +35,7 @@ public partial struct La32 : IPixel, IPackedVector /// Initializes a new instance of the struct. /// /// The luminance component. - /// The alpha componant. + /// The alpha component. public La32(ushort l, ushort a) { this.L = l; diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte2.cs b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte2.cs index 720a1eef65..62eaf949d1 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte2.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte2.cs @@ -17,8 +17,8 @@ public partial struct NormalizedByte2 : IPixel, IPackedVector /// Initializes a new instance of the struct. @@ -93,7 +93,7 @@ public void FromVector4(Vector4 vector) /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() => new Vector4(this.ToVector2(), 0F, 1F); + public readonly Vector4 ToVector4() => new(this.ToVector2(), 0F, 1F); /// [MethodImpl(InliningOptions.ShortMethod)] @@ -153,12 +153,9 @@ public void FromVector4(Vector4 vector) /// /// The . [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector2 ToVector2() - { - return new Vector2( + public readonly Vector2 ToVector2() => new( (sbyte)((this.PackedValue >> 0) & 0xFF) / MaxPos, (sbyte)((this.PackedValue >> 8) & 0xFF) / MaxPos); - } /// public override readonly bool Equals(object obj) => obj is NormalizedByte2 other && this.Equals(other); diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte4.cs b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte4.cs index d1b4b73f2b..2e81b3e2dc 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte4.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedByte4.cs @@ -17,8 +17,8 @@ public partial struct NormalizedByte4 : IPixel, IPackedVector /// Initializes a new instance of the struct. @@ -91,14 +91,11 @@ public readonly Vector4 ToScaledVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() - { - return new Vector4( + public readonly Vector4 ToVector4() => new( (sbyte)((this.PackedValue >> 0) & 0xFF) / MaxPos, (sbyte)((this.PackedValue >> 8) & 0xFF) / MaxPos, (sbyte)((this.PackedValue >> 16) & 0xFF) / MaxPos, (sbyte)((this.PackedValue >> 24) & 0xFF) / MaxPos); - } /// [MethodImpl(InliningOptions.ShortMethod)] @@ -142,10 +139,7 @@ public readonly Vector4 ToVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort2.cs b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort2.cs index d08a546031..b97aaacec8 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort2.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort2.cs @@ -18,7 +18,7 @@ public partial struct NormalizedShort2 : IPixel, IPackedVector // Largest two byte positive number 0xFFFF >> 1; private const float MaxPos = 0x7FFF; - private static readonly Vector2 Max = new Vector2(MaxPos); + private static readonly Vector2 Max = new(MaxPos); private static readonly Vector2 Min = Vector2.Negate(Max); /// @@ -138,10 +138,7 @@ public void FromVector4(Vector4 vector) /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] @@ -157,12 +154,9 @@ public void ToRgba32(ref Rgba32 dest) /// /// The . [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector2 ToVector2() - { - return new Vector2( + public readonly Vector2 ToVector2() => new( (short)(this.PackedValue & 0xFFFF) / MaxPos, (short)(this.PackedValue >> 0x10) / MaxPos); - } /// public override readonly bool Equals(object obj) => obj is NormalizedShort2 other && this.Equals(other); diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort4.cs b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort4.cs index 158b6eb4b0..f2e8aedd8f 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort4.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/NormalizedShort4.cs @@ -18,7 +18,7 @@ public partial struct NormalizedShort4 : IPixel, IPackedVector // Largest two byte positive number 0xFFFF >> 1; private const float MaxPos = 0x7FFF; - private static readonly Vector4 Max = new Vector4(MaxPos); + private static readonly Vector4 Max = new(MaxPos); private static readonly Vector4 Min = Vector4.Negate(Max); /// @@ -92,14 +92,11 @@ public readonly Vector4 ToScaledVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() - { - return new Vector4( + public readonly Vector4 ToVector4() => new( (short)((this.PackedValue >> 0x00) & 0xFFFF) / MaxPos, (short)((this.PackedValue >> 0x10) & 0xFFFF) / MaxPos, (short)((this.PackedValue >> 0x20) & 0xFFFF) / MaxPos, (short)((this.PackedValue >> 0x30) & 0xFFFF) / MaxPos); - } /// [MethodImpl(InliningOptions.ShortMethod)] @@ -143,10 +140,7 @@ public readonly Vector4 ToVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Rg32.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Rg32.cs index d7e6f53cf2..12b6e153f9 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Rg32.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Rg32.cs @@ -15,7 +15,7 @@ namespace SixLabors.ImageSharp.PixelFormats /// public partial struct Rg32 : IPixel, IPackedVector { - private static readonly Vector2 Max = new Vector2(ushort.MaxValue); + private static readonly Vector2 Max = new(ushort.MaxValue); /// /// Initializes a new instance of the struct. @@ -79,7 +79,7 @@ public void FromVector4(Vector4 vector) /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() => new Vector4(this.ToVector2(), 0F, 1F); + public readonly Vector4 ToVector4() => new(this.ToVector2(), 0F, 1F); /// [MethodImpl(InliningOptions.ShortMethod)] @@ -123,10 +123,7 @@ public void FromVector4(Vector4 vector) /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.cs index 7fd63c6766..3b5bdb3d5a 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.cs @@ -36,8 +36,8 @@ public partial struct Rgb24 : IPixel [FieldOffset(2)] public byte B; - private static readonly Vector4 MaxBytes = new Vector4(byte.MaxValue); - private static readonly Vector4 Half = new Vector4(0.5F); + private static readonly Vector4 MaxBytes = new(byte.MaxValue); + private static readonly Vector4 Half = new(0.5F); /// /// Initializes a new instance of the struct. diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Rgb48.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Rgb48.cs index e3738b70c1..d16b7db7ac 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Rgb48.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Rgb48.cs @@ -93,7 +93,7 @@ public void FromVector4(Vector4 vector) /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() => new Vector4(this.R / Max, this.G / Max, this.B / Max, 1F); + public readonly Vector4 ToVector4() => new(this.R / Max, this.G / Max, this.B / Max, 1F); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Rgba1010102.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Rgba1010102.cs index dee2f9fcb6..e687260187 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Rgba1010102.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Rgba1010102.cs @@ -16,7 +16,7 @@ namespace SixLabors.ImageSharp.PixelFormats /// public partial struct Rgba1010102 : IPixel, IPackedVector { - private static readonly Vector4 Multiplier = new Vector4(1023F, 1023F, 1023F, 3F); + private static readonly Vector4 Multiplier = new(1023F, 1023F, 1023F, 3F); /// /// Initializes a new instance of the struct. @@ -78,14 +78,11 @@ public Rgba1010102(float x, float y, float z, float w) /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() - { - return new Vector4( + public readonly Vector4 ToVector4() => new Vector4( (this.PackedValue >> 0) & 0x03FF, (this.PackedValue >> 10) & 0x03FF, (this.PackedValue >> 20) & 0x03FF, (this.PackedValue >> 30) & 0x03) / Multiplier; - } /// [MethodImpl(InliningOptions.ShortMethod)] @@ -129,10 +126,7 @@ public readonly Vector4 ToVector4() /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest.FromScaledVector4(this.ToScaledVector4()); - } + public void ToRgba32(ref Rgba32 dest) => dest.FromScaledVector4(this.ToScaledVector4()); /// [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.cs index 868165e9c4..3dc6490f1b 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.cs @@ -44,8 +44,8 @@ public partial struct Rgba32 : IPixel, IPackedVector /// public byte A; - private static readonly Vector4 MaxBytes = new Vector4(byte.MaxValue); - private static readonly Vector4 Half = new Vector4(0.5F); + private static readonly Vector4 MaxBytes = new(byte.MaxValue); + private static readonly Vector4 Half = new(0.5F); /// /// Initializes a new instance of the struct. @@ -137,7 +137,7 @@ public uint Rgba public Rgb24 Rgb { [MethodImpl(InliningOptions.ShortMethod)] - readonly get => new Rgb24(this.R, this.G, this.B); + readonly get => new(this.R, this.G, this.B); [MethodImpl(InliningOptions.ShortMethod)] set @@ -154,7 +154,7 @@ public Rgb24 Rgb public Bgr24 Bgr { [MethodImpl(InliningOptions.ShortMethod)] - readonly get => new Bgr24(this.R, this.G, this.B); + readonly get => new(this.R, this.G, this.B); [MethodImpl(InliningOptions.ShortMethod)] set @@ -181,7 +181,7 @@ public uint PackedValue /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static implicit operator Color(Rgba32 source) => new Color(source); + public static implicit operator Color(Rgba32 source) => new(source); /// /// Converts a to . @@ -393,10 +393,7 @@ public void FromRgb24(Rgb24 source) /// [MethodImpl(InliningOptions.ShortMethod)] - public void ToRgba32(ref Rgba32 dest) - { - dest = this; - } + public void ToRgba32(ref Rgba32 dest) => dest = this; /// [MethodImpl(InliningOptions.ShortMethod)] @@ -424,7 +421,7 @@ public void FromRgba64(Rgba64 source) /// A hexadecimal string representation of the value. public readonly string ToHex() { - uint hexOrder = (uint)(this.A << 0 | this.B << 8 | this.G << 16 | this.R << 24); + uint hexOrder = (uint)((this.A << 0) | (this.B << 8) | (this.G << 16) | (this.R << 24)); return hexOrder.ToString("X8"); } @@ -523,7 +520,7 @@ private static string ToRgbaHex(string hex) return hex + "FF"; } - if (hex.Length < 3 || hex.Length > 4) + if (hex.Length is < 3 or > 4) { return null; } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Rgba64.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Rgba64.cs index 9add3d7180..4cfa0bf974 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Rgba64.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Rgba64.cs @@ -162,7 +162,7 @@ public ulong PackedValue /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static implicit operator Color(Rgba64 source) => new Color(source); + public static implicit operator Color(Rgba64 source) => new(source); /// /// Converts a to . diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/RgbaVector.cs b/src/ImageSharp/PixelFormats/PixelImplementations/RgbaVector.cs index 97e103d0f2..cd6f53c4ed 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/RgbaVector.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/RgbaVector.cs @@ -43,8 +43,8 @@ public partial struct RgbaVector : IPixel public float A; private const float MaxBytes = byte.MaxValue; - private static readonly Vector4 Max = new Vector4(MaxBytes); - private static readonly Vector4 Half = new Vector4(0.5F); + private static readonly Vector4 Max = new(MaxBytes); + private static readonly Vector4 Half = new(0.5F); /// /// Initializes a new instance of the struct. @@ -120,7 +120,7 @@ public void FromVector4(Vector4 vector) /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() => new Vector4(this.R, this.G, this.B, this.A); + public readonly Vector4 ToVector4() => new(this.R, this.G, this.B, this.A); /// [MethodImpl(InliningOptions.ShortMethod)] @@ -183,7 +183,7 @@ public readonly string ToHex() // Hex is RRGGBBAA Vector4 vector = this.ToVector4() * Max; vector += Half; - uint hexOrder = (uint)((byte)vector.W | (byte)vector.Z << 8 | (byte)vector.Y << 16 | (byte)vector.X << 24); + uint hexOrder = (uint)((byte)vector.W | ((byte)vector.Z << 8) | ((byte)vector.Y << 16) | ((byte)vector.X << 24)); return hexOrder.ToString("X8"); } @@ -199,10 +199,7 @@ public readonly bool Equals(RgbaVector other) => && this.A.Equals(other.A); /// - public override readonly string ToString() - { - return FormattableString.Invariant($"RgbaVector({this.R:#0.##}, {this.G:#0.##}, {this.B:#0.##}, {this.A:#0.##})"); - } + public override readonly string ToString() => FormattableString.Invariant($"RgbaVector({this.R:#0.##}, {this.G:#0.##}, {this.B:#0.##}, {this.A:#0.##})"); /// public override readonly int GetHashCode() => HashCode.Combine(this.R, this.G, this.B, this.A); diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Short2.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Short2.cs index 101027a78e..24f6b4d1d4 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Short2.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Short2.cs @@ -21,8 +21,8 @@ public partial struct Short2 : IPixel, IPackedVector // Two's complement private const float MinNeg = ~(int)MaxPos; - private static readonly Vector2 Max = new Vector2(MaxPos); - private static readonly Vector2 Min = new Vector2(MinNeg); + private static readonly Vector2 Max = new(MaxPos); + private static readonly Vector2 Min = new(MinNeg); /// /// Initializes a new instance of the struct. @@ -97,7 +97,7 @@ public void FromVector4(Vector4 vector) /// [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector4 ToVector4() => new Vector4((short)(this.PackedValue & 0xFFFF), (short)(this.PackedValue >> 0x10), 0, 1); + public readonly Vector4 ToVector4() => new((short)(this.PackedValue & 0xFFFF), (short)(this.PackedValue >> 0x10), 0, 1); /// [MethodImpl(InliningOptions.ShortMethod)] @@ -157,7 +157,7 @@ public void FromVector4(Vector4 vector) /// /// The . [MethodImpl(InliningOptions.ShortMethod)] - public readonly Vector2 ToVector2() => new Vector2((short)(this.PackedValue & 0xFFFF), (short)(this.PackedValue >> 0x10)); + public readonly Vector2 ToVector2() => new((short)(this.PackedValue & 0xFFFF), (short)(this.PackedValue >> 0x10)); /// public override readonly bool Equals(object obj) => obj is Short2 other && this.Equals(other); diff --git a/tests/ImageSharp.Tests/PixelFormats/Bgr24Tests.cs b/tests/ImageSharp.Tests/PixelFormats/Bgr24Tests.cs index f6a6d44bb4..36cdd157d9 100644 --- a/tests/ImageSharp.Tests/PixelFormats/Bgr24Tests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/Bgr24Tests.cs @@ -28,8 +28,7 @@ public void AreNotEqual() Assert.NotEqual(color1, color2); } - public static readonly TheoryData ColorData = - new TheoryData { { 1, 2, 3 }, { 4, 5, 6 }, { 0, 255, 42 } }; + public static readonly TheoryData ColorData = new() { { 1, 2, 3 }, { 4, 5, 6 }, { 0, 255, 42 } }; [Theory] [MemberData(nameof(ColorData))] diff --git a/tests/ImageSharp.Tests/PixelFormats/Bgra32Tests.cs b/tests/ImageSharp.Tests/PixelFormats/Bgra32Tests.cs index b7fbdde714..4b8f4c2eaf 100644 --- a/tests/ImageSharp.Tests/PixelFormats/Bgra32Tests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/Bgra32Tests.cs @@ -35,10 +35,13 @@ public void AreNotEqual() } public static readonly TheoryData ColorData = - new TheoryData - { - { 1, 2, 3, 4 }, { 4, 5, 6, 7 }, { 0, 255, 42, 0 }, { 1, 2, 3, 255 } - }; + new() + { + { 1, 2, 3, 4 }, + { 4, 5, 6, 7 }, + { 0, 255, 42, 0 }, + { 1, 2, 3, 255 } + }; [Theory] [MemberData(nameof(ColorData))] diff --git a/tests/ImageSharp.Tests/PixelFormats/L8Tests.cs b/tests/ImageSharp.Tests/PixelFormats/L8Tests.cs index d877283c1d..fc91590d22 100644 --- a/tests/ImageSharp.Tests/PixelFormats/L8Tests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/L8Tests.cs @@ -12,29 +12,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats public class L8Tests { public static readonly TheoryData LuminanceData - = new TheoryData - { - 0, - 1, - 2, - 3, - 5, - 13, - 31, - 71, - 73, - 79, - 83, - 109, - 127, - 128, - 131, - 199, - 250, - 251, - 254, - 255 - }; + = new() { 0, 1, 2, 3, 5, 13, 31, 71, 73, 79, 83, 109, 127, 128, 131, 199, 250, 251, 254, 255 }; [Theory] [InlineData(0)] diff --git a/tests/ImageSharp.Tests/PixelFormats/La16Tests.cs b/tests/ImageSharp.Tests/PixelFormats/La16Tests.cs index 2c9a27028d..7e082147eb 100644 --- a/tests/ImageSharp.Tests/PixelFormats/La16Tests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/La16Tests.cs @@ -12,29 +12,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats public class La16Tests { public static readonly TheoryData LuminanceData - = new TheoryData - { - 0, - 1, - 2, - 3, - 5, - 13, - 31, - 71, - 73, - 79, - 83, - 109, - 127, - 128, - 131, - 199, - 250, - 251, - 254, - 255 - }; + = new() { 0, 1, 2, 3, 5, 13, 31, 71, 73, 79, 83, 109, 127, 128, 131, 199, 250, 251, 254, 255 }; [Theory] [InlineData(0, 0)] diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenderTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenderTests.cs index 7954f1aff1..5988cc851a 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenderTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenderTests.cs @@ -12,7 +12,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [Trait("Category", "PixelFormats")] public class PixelBlenderTests { - public static TheoryData BlenderMappings = new TheoryData + public static TheoryData BlenderMappings = new() { { new TestPixel(), typeof(DefaultPixelBlenders.NormalSrcOver), PixelColorBlendingMode.Normal }, { new TestPixel(), typeof(DefaultPixelBlenders.ScreenSrcOver), PixelColorBlendingMode.Screen }, @@ -43,7 +43,7 @@ public void ReturnsCorrectBlender(TestPixel pixel, Type type, Pi Assert.IsType(type, blender); } - public static TheoryData ColorBlendingExpectedResults = new TheoryData + public static TheoryData ColorBlendingExpectedResults = new() { { Color.MistyRose, Color.MidnightBlue, 1, PixelColorBlendingMode.Normal, Color.MidnightBlue }, { Color.MistyRose, Color.MidnightBlue, 1, PixelColorBlendingMode.Screen, new Rgba32(0xFFEEE7FF) }, @@ -67,7 +67,7 @@ public void TestColorBlendingModes(Rgba32 backdrop, Rgba32 source, float opacity Assert.Equal(actualResult.ToVector4(), expectedResult.ToVector4()); } - public static TheoryData AlphaCompositionExpectedResults = new TheoryData + public static TheoryData AlphaCompositionExpectedResults = new() { { Color.MistyRose, Color.MidnightBlue, 1, PixelAlphaCompositionMode.Clear, new Rgba32(0) }, { Color.MistyRose, Color.MidnightBlue, 1, PixelAlphaCompositionMode.Xor, new Rgba32(0) }, diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs index ec53629a80..315f9f7761 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs @@ -11,21 +11,21 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats public abstract partial class PixelConverterTests { public static readonly TheoryData RgbaData = - new TheoryData - { - { 0, 0, 0, 0 }, - { 0, 0, 0, 255 }, - { 0, 0, 255, 0 }, - { 0, 255, 0, 0 }, - { 255, 0, 0, 0 }, - { 255, 255, 255, 255 }, - { 0, 0, 0, 1 }, - { 0, 0, 1, 0 }, - { 0, 1, 0, 0 }, - { 1, 0, 0, 0 }, - { 3, 5, 7, 11 }, - { 67, 71, 101, 109 } - }; + new() + { + { 0, 0, 0, 0 }, + { 0, 0, 0, 255 }, + { 0, 0, 255, 0 }, + { 0, 255, 0, 0 }, + { 255, 0, 0, 0 }, + { 255, 255, 255, 255 }, + { 0, 0, 0, 1 }, + { 0, 0, 1, 0 }, + { 0, 1, 0, 0 }, + { 1, 0, 0, 0 }, + { 3, 5, 7, 11 }, + { 67, 71, 101, 109 } + }; public class FromRgba32 : PixelConverterTests { @@ -34,7 +34,7 @@ public class FromRgba32 : PixelConverterTests public void ToArgb32(byte r, byte g, byte b, byte a) { byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - var actual = new byte[source.Length]; + byte[] actual = new byte[source.Length]; PixelConverter.FromRgba32.ToArgb32(source, actual); @@ -48,7 +48,7 @@ public void ToArgb32(byte r, byte g, byte b, byte a) public void ToBgra32(byte r, byte g, byte b, byte a) { byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - var actual = new byte[source.Length]; + byte[] actual = new byte[source.Length]; PixelConverter.FromRgba32.ToBgra32(source, actual); @@ -65,7 +65,7 @@ public class FromArgb32 : PixelConverterTests public void ToRgba32(byte r, byte g, byte b, byte a) { byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - var actual = new byte[source.Length]; + byte[] actual = new byte[source.Length]; PixelConverter.FromArgb32.ToRgba32(source, actual); @@ -79,7 +79,7 @@ public void ToRgba32(byte r, byte g, byte b, byte a) public void ToBgra32(byte r, byte g, byte b, byte a) { byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - var actual = new byte[source.Length]; + byte[] actual = new byte[source.Length]; PixelConverter.FromArgb32.ToBgra32(source, actual); @@ -96,7 +96,7 @@ public class FromBgra32 : PixelConverterTests public void ToArgb32(byte r, byte g, byte b, byte a) { byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - var actual = new byte[source.Length]; + byte[] actual = new byte[source.Length]; PixelConverter.FromBgra32.ToArgb32(source, actual); @@ -110,7 +110,7 @@ public void ToArgb32(byte r, byte g, byte b, byte a) public void ToRgba32(byte r, byte g, byte b, byte a) { byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - var actual = new byte[source.Length]; + byte[] actual = new byte[source.Length]; PixelConverter.FromBgra32.ToRgba32(source, actual); diff --git a/tests/ImageSharp.Tests/PixelFormats/Rgb24Tests.cs b/tests/ImageSharp.Tests/PixelFormats/Rgb24Tests.cs index 4d4f8c9fb9..6c98e623fd 100644 --- a/tests/ImageSharp.Tests/PixelFormats/Rgb24Tests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/Rgb24Tests.cs @@ -11,7 +11,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats public class Rgb24Tests { public static readonly TheoryData ColorData = - new TheoryData + new() { { 1, 2, 3 }, { 4, 5, 6 }, @@ -76,7 +76,7 @@ public void FromRgba32() Assert.Equal(3, rgb.B); } - private static Vector4 Vec(byte r, byte g, byte b, byte a = 255) => new Vector4( + private static Vector4 Vec(byte r, byte g, byte b, byte a = 255) => new( r / 255f, g / 255f, b / 255f, diff --git a/tests/ImageSharp.Tests/PixelFormats/UnPackedPixelTests.cs b/tests/ImageSharp.Tests/PixelFormats/UnPackedPixelTests.cs index 9492fef90b..20484b073c 100644 --- a/tests/ImageSharp.Tests/PixelFormats/UnPackedPixelTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/UnPackedPixelTests.cs @@ -5,7 +5,7 @@ using SixLabors.ImageSharp.PixelFormats; using Xunit; -namespace SixLabors.ImageSharp.Tests.Colors +namespace SixLabors.ImageSharp.Tests.PixelFormats { [Trait("Category", "PixelFormats")] public class UnPackedPixelTests From e168ae6a2c8bb4774c871f1372bab4d7f8051b3d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 26 Oct 2021 16:42:23 +0200 Subject: [PATCH 03/85] Use Span in GetHTreeGroupForPos to avoid allocations --- .../Formats/Webp/Lossless/WebpLosslessDecoder.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs index 9604160091..768365e44e 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs @@ -218,7 +218,7 @@ public void DecodeImageData(Vp8LDecoder decoder, Span pixelData) ColorCache colorCache = decoder.Metadata.ColorCache; int colorCacheLimit = lenCodeLimit + colorCacheSize; int mask = decoder.Metadata.HuffmanMask; - HTreeGroup[] hTreeGroup = GetHTreeGroupForPos(decoder.Metadata, col, row); + Span hTreeGroup = GetHTreeGroupForPos(decoder.Metadata, col, row); int totalPixels = width * height; int decodedPixels = 0; @@ -731,7 +731,7 @@ public void DecodeAlphaData(AlphaDecoder dec) int lastRow = height; const int lenCodeLimit = WebpConstants.NumLiteralCodes + WebpConstants.NumLengthCodes; int mask = hdr.HuffmanMask; - HTreeGroup[] htreeGroup = pos < last ? GetHTreeGroupForPos(hdr, col, row) : null; + Span htreeGroup = pos < last ? GetHTreeGroupForPos(hdr, col, row) : null; while (!this.bitReader.Eos && pos < last) { // Only update when changing tile. @@ -815,7 +815,7 @@ private void UpdateDecoder(Vp8LDecoder decoder, int width, int height) decoder.Metadata.HuffmanMask = numBits == 0 ? ~0 : (1 << numBits) - 1; } - private uint ReadPackedSymbols(HTreeGroup[] group, Span pixelData, int decodedPixels) + private uint ReadPackedSymbols(Span group, Span pixelData, int decodedPixels) { uint val = (uint)(this.bitReader.PrefetchBits() & (HuffmanUtils.HuffmanPackedTableSize - 1)); HuffmanCode code = group[0].PackedTable[val]; @@ -895,10 +895,10 @@ private int GetCopyDistance(int distanceSymbol) } [MethodImpl(InliningOptions.ShortMethod)] - private static HTreeGroup[] GetHTreeGroupForPos(Vp8LMetadata metadata, int x, int y) + private static Span GetHTreeGroupForPos(Vp8LMetadata metadata, int x, int y) { uint metaIndex = GetMetaIndex(metadata.HuffmanImage, metadata.HuffmanXSize, metadata.HuffmanSubSampleBits, x, y); - return metadata.HTreeGroups.AsSpan((int)metaIndex).ToArray(); + return metadata.HTreeGroups.AsSpan((int)metaIndex); } [MethodImpl(InliningOptions.ShortMethod)] From b50f146fe2a163b4f4818745a55eec08992a8cd8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 27 Oct 2021 13:17:45 -0700 Subject: [PATCH 04/85] Support running on arm4 --- tests/ImageSharp.Benchmarks/Config.cs | 4 ++-- tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Config.cs b/tests/ImageSharp.Benchmarks/Config.cs index 9221bb7fda..2997848211 100644 --- a/tests/ImageSharp.Benchmarks/Config.cs +++ b/tests/ImageSharp.Benchmarks/Config.cs @@ -34,7 +34,7 @@ public class MultiFramework : Config public MultiFramework() => this.AddJob( Job.Default.WithRuntime(ClrRuntime.Net472), Job.Default.WithRuntime(CoreRuntime.Core31), - Job.Default.WithRuntime(CoreRuntime.Core50)); + Job.Default.WithRuntime(CoreRuntime.Core50).With(new Argument[] { new MsBuildArgument("/p:DebugType=portable") })); } public class ShortMultiFramework : Config @@ -42,7 +42,7 @@ public class ShortMultiFramework : Config public ShortMultiFramework() => this.AddJob( Job.Default.WithRuntime(ClrRuntime.Net472).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3), Job.Default.WithRuntime(CoreRuntime.Core31).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3), - Job.Default.WithRuntime(CoreRuntime.Core50).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3)); + Job.Default.WithRuntime(CoreRuntime.Core50).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3).With(new Argument[] { new MsBuildArgument("/p:DebugType=portable") })); } public class ShortCore31 : Config diff --git a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj index b9ab31972f..8f0b4a86f2 100644 --- a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj +++ b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj @@ -6,6 +6,7 @@ Exe SixLabors.ImageSharp.Benchmarks false + portable false Debug;Release;Debug-InnerLoop;Release-InnerLoop From 257ff1929e341e5b1af94d9adf557e5296ece957 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 29 Oct 2021 23:32:13 +1100 Subject: [PATCH 05/85] Use RgbaVector for color backing --- src/ImageSharp/Color/Color.Conversions.cs | 87 ++++++++++++++++--- src/ImageSharp/Color/Color.cs | 74 ++++++++-------- .../Color/ColorTests.CastFrom.cs | 17 +++- .../Color/ColorTests.ConstructFrom.cs | 4 +- 4 files changed, 125 insertions(+), 57 deletions(-) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index 0455fd26a4..abcb54b807 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -17,56 +17,90 @@ public readonly partial struct Color /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba64 pixel) => this.data = pixel; + public Color(Rgba64 pixel) + { + RgbaVector vector = default; + vector.FromRgba64(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba32 pixel) => this.data = new Rgba64(pixel); + public Color(Rgba32 pixel) + { + RgbaVector vector = default; + vector.FromRgba32(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Argb32 pixel) => this.data = new Rgba64(pixel); + public Color(Argb32 pixel) + { + RgbaVector vector = default; + vector.FromArgb32(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgra32 pixel) => this.data = new Rgba64(pixel); + public Color(Bgra32 pixel) + { + RgbaVector vector = default; + vector.FromBgra32(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgb24 pixel) => this.data = new Rgba64(pixel); + public Color(Rgb24 pixel) + { + RgbaVector vector = default; + vector.FromRgb24(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgr24 pixel) => this.data = new Rgba64(pixel); + public Color(Bgr24 pixel) + { + RgbaVector vector = default; + vector.FromBgr24(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Vector4 vector) => this.data = new Rgba64(vector); + public Color(Vector4 vector) + { + vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One); + this.data = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W); + } /// /// Converts a to . /// /// The . /// The . - public static explicit operator Vector4(Color color) => color.data.ToVector4(); + public static explicit operator Vector4(Color color) => color.data.ToScaledVector4(); /// /// Converts an to . @@ -74,22 +108,47 @@ public readonly partial struct Color /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static explicit operator Color(Vector4 source) => new Color(source); + public static explicit operator Color(Vector4 source) => new(source); [MethodImpl(InliningOptions.ShortMethod)] - internal Rgba32 ToRgba32() => this.data.ToRgba32(); + internal Rgba32 ToRgba32() + { + Rgba32 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Bgra32 ToBgra32() => this.data.ToBgra32(); + internal Bgra32 ToBgra32() + { + Bgra32 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Argb32 ToArgb32() => this.data.ToArgb32(); + internal Argb32 ToArgb32() + { + Argb32 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Rgb24 ToRgb24() => this.data.ToRgb24(); + internal Rgb24 ToRgb24() + { + Rgb24 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Bgr24 ToBgr24() => this.data.ToBgr24(); + internal Bgr24 ToBgr24() + { + Bgr24 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] internal Vector4 ToVector4() => this.data.ToVector4(); diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index d5eedc160b..9a4df4e629 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -20,26 +20,22 @@ namespace SixLabors.ImageSharp /// public readonly partial struct Color : IEquatable { - private readonly Rgba64 data; + private readonly RgbaVector data; [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b, byte a) { - this.data = new Rgba64( - ColorNumerics.UpscaleFrom8BitTo16Bit(r), - ColorNumerics.UpscaleFrom8BitTo16Bit(g), - ColorNumerics.UpscaleFrom8BitTo16Bit(b), - ColorNumerics.UpscaleFrom8BitTo16Bit(a)); + RgbaVector vector = default; + vector.FromRgba32(new(r, g, b, a)); + this.data = vector; } [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b) { - this.data = new Rgba64( - ColorNumerics.UpscaleFrom8BitTo16Bit(r), - ColorNumerics.UpscaleFrom8BitTo16Bit(g), - ColorNumerics.UpscaleFrom8BitTo16Bit(b), - ushort.MaxValue); + RgbaVector vector = default; + vector.FromRgba32(new(r, g, b)); + this.data = vector; } /// @@ -52,10 +48,7 @@ private Color(byte r, byte g, byte b) /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator ==(Color left, Color right) - { - return left.Equals(right); - } + public static bool operator ==(Color left, Color right) => left.Equals(right); /// /// Checks whether two structures are equal. @@ -67,10 +60,7 @@ private Color(byte r, byte g, byte b) /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator !=(Color left, Color right) - { - return !left.Equals(right); - } + public static bool operator !=(Color left, Color right) => !left.Equals(right); /// /// Creates a from RGBA bytes. @@ -81,7 +71,7 @@ private Color(byte r, byte g, byte b) /// The alpha component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgba(byte r, byte g, byte b, byte a) => new Color(r, g, b, a); + public static Color FromRgba(byte r, byte g, byte b, byte a) => new(r, g, b, a); /// /// Creates a from RGB bytes. @@ -91,7 +81,17 @@ private Color(byte r, byte g, byte b) /// The blue component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgb(byte r, byte g, byte b) => new Color(r, g, b); + public static Color FromRgb(byte r, byte g, byte b) => new(r, g, b); + + /// + /// Creates a from the given . + /// + /// The pixel to convert from. + /// The pixel format. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Color FromPixel(TPixel pixel) + where TPixel : unmanaged, IPixel => new(pixel.ToScaledVector4()); /// /// Creates a new instance of the struct @@ -207,13 +207,18 @@ public Color WithAlpha(float alpha) /// /// A hexadecimal string representation of the value. [MethodImpl(InliningOptions.ShortMethod)] - public string ToHex() => this.data.ToRgba32().ToHex(); + public string ToHex() + { + Rgba32 rgba = default; + this.data.ToRgba32(ref rgba); + return rgba.ToHex(); + } /// public override string ToString() => this.ToHex(); /// - /// Converts the color instance to a specified type. + /// Converts the color instance to a specified type. /// /// The pixel type to convert to. /// The pixel value. @@ -222,12 +227,12 @@ public TPixel ToPixel() where TPixel : unmanaged, IPixel { TPixel pixel = default; - pixel.FromRgba64(this.data); + pixel.FromScaledVector4(this.data.ToScaledVector4()); return pixel; } /// - /// Bulk converts a span of to a span of a specified type. + /// Bulk converts a span of to a span of a specified type. /// /// The pixel type to convert to. /// The configuration. @@ -240,28 +245,19 @@ public static void ToPixel( Span destination) where TPixel : unmanaged, IPixel { - ReadOnlySpan rgba64Span = MemoryMarshal.Cast(source); - PixelOperations.Instance.FromRgba64(configuration, rgba64Span, destination); + ReadOnlySpan rgbaSpan = MemoryMarshal.Cast(source); + PixelOperations.Instance.From(configuration, rgbaSpan, destination); } /// [MethodImpl(InliningOptions.ShortMethod)] - public bool Equals(Color other) - { - return this.data.PackedValue == other.data.PackedValue; - } + public bool Equals(Color other) => this.data.Equals(other.data); /// - public override bool Equals(object obj) - { - return obj is Color other && this.Equals(other); - } + public override bool Equals(object obj) => obj is Color other && this.Equals(other); /// [MethodImpl(InliningOptions.ShortMethod)] - public override int GetHashCode() - { - return this.data.PackedValue.GetHashCode(); - } + public override int GetHashCode() => this.data.GetHashCode(); } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs index 38b94f486c..356ef7351e 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs @@ -66,7 +66,7 @@ public void Bgra32() [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: Color color = source; @@ -79,7 +79,7 @@ public void Rgb24() [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: Color color = source; @@ -88,6 +88,19 @@ public void Bgr24() Bgr24 data = color.ToPixel(); Assert.Equal(source, data); } + + [Fact] + public void TPixel() + { + var source = new RgbaVector(1, .1F, .133F, .864F); + + // Act: + var color = Color.FromPixel(source); + + // Assert: + RgbaVector data = color.ToPixel(); + Assert.Equal(source, data); + } } } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs b/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs index 89276014b0..dd51f3a6c2 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs @@ -66,7 +66,7 @@ public void Bgra32() [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: var color = new Color(source); @@ -79,7 +79,7 @@ public void Rgb24() [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: var color = new Color(source); From c68ef21613e237dc4220ecfe80347693527b192b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 29 Oct 2021 17:29:56 +0200 Subject: [PATCH 06/85] Write exif profile with padding if needed --- .../Formats/Webp/BitWriter/BitWriterBase.cs | 49 +++++++++++++++---- .../Formats/Webp/BitWriter/Vp8BitWriter.cs | 4 +- .../Formats/Webp/BitWriter/Vp8LBitWriter.cs | 9 ++-- .../Formats/Webp/WebpEncoderCore.cs | 2 - .../Formats/WebP/WebpMetaDataTests.cs | 25 ++++++++++ 5 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs index 41623f2878..31e636b6bc 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs @@ -10,11 +10,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter { internal abstract class BitWriterBase { + private const uint MaxDimension = 16777215; + + private const ulong MaxCanvasPixels = 4294967295ul; + + protected const uint ExtendedFileChunkSize = WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize; + /// /// Buffer to write to. /// private byte[] buffer; + /// + /// A scratch buffer to reduce allocations. + /// + private readonly byte[] scratchBuffer = new byte[4]; + /// /// Initializes a new instance of the class. /// @@ -81,13 +92,25 @@ protected void ResizeBuffer(int maxBytes, int sizeRequired) /// The block length. protected void WriteRiffHeader(Stream stream, uint riffSize) { - Span buf = stackalloc byte[4]; stream.Write(WebpConstants.RiffFourCc); - BinaryPrimitives.WriteUInt32LittleEndian(buf, riffSize); - stream.Write(buf); + BinaryPrimitives.WriteUInt32LittleEndian(this.scratchBuffer, riffSize); + stream.Write(this.scratchBuffer.AsSpan(0, 4)); stream.Write(WebpConstants.WebpHeader); } + /// + /// Calculates the exif chunk size. + /// + /// The exif profile bytes. + /// The exif chunk size in bytes. + protected uint ExifChunkSize(byte[] exifBytes) + { + uint exifSize = (uint)exifBytes.Length; + uint exifChunkSize = WebpConstants.ChunkHeaderSize + exifSize + (exifSize & 1); + + return exifChunkSize; + } + /// /// Writes the Exif profile to the stream. /// @@ -97,12 +120,19 @@ protected void WriteExifProfile(Stream stream, byte[] exifBytes) { DebugGuard.NotNull(exifBytes, nameof(exifBytes)); - Span buf = stackalloc byte[4]; + uint size = (uint)exifBytes.Length; + Span buf = this.scratchBuffer.AsSpan(0, 4); BinaryPrimitives.WriteUInt32BigEndian(buf, (uint)WebpChunkType.Exif); stream.Write(buf); - BinaryPrimitives.WriteUInt32LittleEndian(buf, (uint)exifBytes.Length); + BinaryPrimitives.WriteUInt32LittleEndian(buf, size); stream.Write(buf); stream.Write(exifBytes); + + // Add padding byte if needed. + if ((size & 1) == 1) + { + stream.WriteByte(0); + } } /// @@ -114,14 +144,13 @@ protected void WriteExifProfile(Stream stream, byte[] exifBytes) /// The height of the image. protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint width, uint height) { - int maxDimension = 16777215; - if (width > maxDimension || height > maxDimension) + if (width > MaxDimension || height > MaxDimension) { - WebpThrowHelper.ThrowInvalidImageDimensions($"Image width or height exceeds maximum allowed dimension of {maxDimension}"); + WebpThrowHelper.ThrowInvalidImageDimensions($"Image width or height exceeds maximum allowed dimension of {MaxDimension}"); } // The spec states that the product of Canvas Width and Canvas Height MUST be at most 2^32 - 1. - if (width * height > 4294967295ul) + if (width * height > MaxCanvasPixels) { WebpThrowHelper.ThrowInvalidImageDimensions("The product of image width and height MUST be at most 2^32 - 1"); } @@ -133,7 +162,7 @@ protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint widt flags |= 8; } - Span buf = stackalloc byte[4]; + Span buf = this.scratchBuffer.AsSpan(0, 4); stream.Write(WebpConstants.Vp8XMagicBytes); BinaryPrimitives.WriteUInt32LittleEndian(buf, WebpConstants.Vp8XChunkSize); stream.Write(buf); diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs index 7628247fd6..2c943f64f0 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs @@ -408,9 +408,9 @@ public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifPr if (exifProfile != null) { isVp8X = true; - riffSize += WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize; + riffSize += ExtendedFileChunkSize; exifBytes = exifProfile.ToByteArray(); - riffSize += WebpConstants.ChunkHeaderSize + (uint)exifBytes.Length; + riffSize += this.ExifChunkSize(exifBytes); } this.Finish(); diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs index 2f942231fb..2ce2f5550c 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs @@ -130,16 +130,15 @@ public override void Finish() /// public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height) { - Span buffer = stackalloc byte[4]; bool isVp8X = false; byte[] exifBytes = null; uint riffSize = 0; if (exifProfile != null) { isVp8X = true; - riffSize += WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize; + riffSize += ExtendedFileChunkSize; exifBytes = exifProfile.ToByteArray(); - riffSize += WebpConstants.ChunkHeaderSize + (uint)exifBytes.Length; + riffSize += this.ExifChunkSize(exifBytes); } this.Finish(); @@ -161,8 +160,8 @@ public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifPr stream.Write(WebpConstants.Vp8LMagicBytes); // Write Vp8 Header. - BinaryPrimitives.WriteUInt32LittleEndian(buffer, size); - stream.Write(buffer); + BinaryPrimitives.WriteUInt32LittleEndian(this.scratchBuffer, size); + stream.Write(this.scratchBuffer.AsSpan(0, 4)); stream.WriteByte(WebpConstants.Vp8LHeaderMagicByte); // Write the encoded bytes of the image to the stream. diff --git a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs index a61fc72530..8640261b17 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs @@ -4,11 +4,9 @@ using System.IO; using System.Threading; using SixLabors.ImageSharp.Advanced; -using SixLabors.ImageSharp.Formats.Bmp; using SixLabors.ImageSharp.Formats.Webp.Lossless; using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.Memory; -using SixLabors.ImageSharp.Metadata; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Webp diff --git a/tests/ImageSharp.Tests/Formats/WebP/WebpMetaDataTests.cs b/tests/ImageSharp.Tests/Formats/WebP/WebpMetaDataTests.cs index 81067a41f5..a051de1c01 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/WebpMetaDataTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/WebpMetaDataTests.cs @@ -63,6 +63,31 @@ public void IgnoreMetadata_ControlsWhetherIccpIsParsed(TestImageProvider } } + [Theory] + [InlineData(WebpFileFormatType.Lossy)] + [InlineData(WebpFileFormatType.Lossless)] + public void Encode_WritesExifWithPadding(WebpFileFormatType fileFormatType) + { + // arrange + using var input = new Image(25, 25); + using var memoryStream = new MemoryStream(); + var expectedExif = new ExifProfile(); + string expectedSoftware = "ImageSharp"; + expectedExif.SetValue(ExifTag.Software, expectedSoftware); + input.Metadata.ExifProfile = expectedExif; + + // act + input.Save(memoryStream, new WebpEncoder() { FileFormat = fileFormatType }); + memoryStream.Position = 0; + + // assert + using var image = Image.Load(memoryStream); + ExifProfile actualExif = image.Metadata.ExifProfile; + Assert.NotNull(actualExif); + Assert.Equal(expectedExif.Values.Count, actualExif.Values.Count); + Assert.Equal(expectedSoftware, actualExif.GetValue(ExifTag.Software).Value); + } + [Theory] [WithFile(TestImages.Webp.Lossy.WithExif, PixelTypes.Rgba32)] public void EncodeLossyWebp_PreservesExif(TestImageProvider provider) From 7f3c8ffbd0ed8c41e801a361113ee05c40d3c38c Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 29 Oct 2021 19:45:46 +0200 Subject: [PATCH 07/85] Make sure the alpha flag in VP8X and VP8L are the same --- .../Formats/Webp/BitWriter/BitWriterBase.cs | 18 ++++++++---------- .../Formats/Webp/BitWriter/Vp8BitWriter.cs | 17 ++++++++++++----- .../Formats/Webp/BitWriter/Vp8LBitWriter.cs | 13 ++++++++++--- .../Formats/Webp/Lossless/Vp8LEncoder.cs | 2 +- .../Formats/Webp/Lossy/Vp8Encoder.cs | 4 +++- 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs index 31e636b6bc..9208881360 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs @@ -63,15 +63,6 @@ internal abstract class BitWriterBase /// public abstract void Finish(); - /// - /// Writes the encoded image to the stream. - /// - /// The stream to write to. - /// The exif profile. - /// The width of the image. - /// The height of the image. - public abstract void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height); - protected void ResizeBuffer(int maxBytes, int sizeRequired) { int newSize = (3 * maxBytes) >> 1; @@ -142,7 +133,8 @@ protected void WriteExifProfile(Stream stream, byte[] exifBytes) /// A exif profile or null, if it does not exist. /// The width of the image. /// The height of the image. - protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint width, uint height) + /// Flag indicating, if a alpha channel is present. + protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha) { if (width > MaxDimension || height > MaxDimension) { @@ -162,6 +154,12 @@ protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint widt flags |= 8; } + if (hasAlpha) + { + // Set alpha bit. + flags |= 16; + } + Span buf = this.scratchBuffer.AsSpan(0, 4); stream.Write(WebpConstants.Vp8XMagicBytes); BinaryPrimitives.WriteUInt32LittleEndian(buf, WebpConstants.Vp8XChunkSize); diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs index 2c943f64f0..3b2f943db5 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs @@ -399,8 +399,15 @@ private void Flush() } } - /// - public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height) + /// + /// Writes the encoded image to the stream. + /// + /// The stream to write to. + /// The exif profile. + /// The width of the image. + /// The height of the image. + /// Flag indicating, if a alpha channel is present. + public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha) { bool isVp8X = false; byte[] exifBytes = null; @@ -433,7 +440,7 @@ public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifPr riffSize += WebpConstants.TagSize + WebpConstants.ChunkHeaderSize + vp8Size; // Emit headers and partition #0 - this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile); + this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, hasAlpha); bitWriterPartZero.WriteToStream(stream); // Write the encoded image to the stream. @@ -616,14 +623,14 @@ private void CodeIntraModes(Vp8BitWriter bitWriter) while (it.Next()); } - private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile) + private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile, bool hasAlpha) { this.WriteRiffHeader(stream, riffSize); // Write VP8X, header if necessary. if (isVp8X) { - this.WriteVp8XHeader(stream, exifProfile, width, height); + this.WriteVp8XHeader(stream, exifProfile, width, height, hasAlpha); } this.WriteVp8Header(stream, vp8Size); diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs index 2ce2f5550c..b83865aa36 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs @@ -127,8 +127,15 @@ public override void Finish() this.used = 0; } - /// - public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height) + /// + /// Writes the encoded image to the stream. + /// + /// The stream to write to. + /// The exif profile. + /// The width of the image. + /// The height of the image. + /// Flag indicating, if a alpha channel is present. + public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha) { bool isVp8X = false; byte[] exifBytes = null; @@ -153,7 +160,7 @@ public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifPr // Write VP8X, header if necessary. if (isVp8X) { - this.WriteVp8XHeader(stream, exifProfile, width, height); + this.WriteVp8XHeader(stream, exifProfile, width, height, hasAlpha); } // Write magic bytes indicating its a lossless webp. diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 693585637c..2fb3fbc6aa 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -234,7 +234,7 @@ public void Encode(Image image, Stream stream) this.EncodeStream(image); // Write bytes from the bitwriter buffer to the stream. - this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height); + this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height, hasAlpha); } /// diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 37808d56c2..d41da790b3 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -317,6 +317,8 @@ public void Encode(Image image, Stream stream) this.bitWriter = new Vp8BitWriter(expectedSize, this); // TODO: EncodeAlpha(); + bool hasAlpha = false; + // Stats-collection loop. this.StatLoop(width, height, yStride, uvStride); it.Init(); @@ -348,7 +350,7 @@ public void Encode(Image image, Stream stream) // Write bytes from the bitwriter buffer to the stream. image.Metadata.SyncProfiles(); - this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height); + this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height, hasAlpha); } /// From 70c99d3d02369d4584d18e64393e239a5f86e30b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 13:17:32 +0100 Subject: [PATCH 08/85] Reduce allocations --- .../Webp/Lossless/BackwardReferenceEncoder.cs | 10 +- .../Formats/Webp/Lossless/HistogramEncoder.cs | 41 ++++-- .../Formats/Webp/Lossless/HuffmanTree.cs | 9 +- .../Formats/Webp/Lossless/LosslessUtils.cs | 2 +- .../Formats/Webp/Lossless/PixOrCopy.cs | 6 +- .../Formats/Webp/Lossless/PredictorEncoder.cs | 123 +++++++++++++----- .../Formats/Webp/Lossless/Vp8LEncoder.cs | 29 ++++- .../Formats/Webp/Lossless/Vp8LHistogram.cs | 57 ++++---- .../Formats/Webp/Lossless/Vp8LStreaks.cs | 9 ++ .../Webp/Lossless/WebpLosslessDecoder.cs | 3 +- .../Formats/Webp/Lossy/LossyUtils.cs | 51 ++++---- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 86 +++++++----- .../Formats/Webp/Lossy/Vp8EncIterator.cs | 27 ++-- .../Formats/Webp/Lossy/Vp8Encoder.cs | 18 ++- .../Formats/Webp/Lossy/Vp8Encoding.cs | 54 ++++---- .../Formats/Webp/Lossy/Vp8Histogram.cs | 23 ++-- .../Formats/Webp/Lossy/Vp8ModeScore.cs | 18 +++ .../Formats/Webp/Lossy/Vp8Residual.cs | 5 +- .../Formats/Webp/Lossy/WebpLossyDecoder.cs | 30 +++-- .../Formats/WebP/PredictorEncoderTests.cs | 6 +- 20 files changed, 390 insertions(+), 217 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs index 70c4efb990..dc546f8ac2 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs @@ -49,6 +49,8 @@ public static Vp8LBackwardRefs GetBackwardReferences( double bitCostBest = -1; int cacheBitsInitial = cacheBits; Vp8LHashChain hashChainBox = null; + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1) { int cacheBitsTmp = cacheBitsInitial; @@ -81,7 +83,7 @@ public static Vp8LBackwardRefs GetBackwardReferences( // Keep the best backward references. var histo = new Vp8LHistogram(worst, cacheBitsTmp); - double bitCost = histo.EstimateBits(); + double bitCost = histo.EstimateBits(stats, bitsEntropy); if (lz77TypeBest == 0 || bitCost < bitCostBest) { @@ -100,7 +102,7 @@ public static Vp8LBackwardRefs GetBackwardReferences( Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox; BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst); var histo = new Vp8LHistogram(worst, cacheBits); - double bitCostTrace = histo.EstimateBits(); + double bitCostTrace = histo.EstimateBits(stats, bitsEntropy); if (bitCostTrace < bitCostBest) { best = worst; @@ -214,9 +216,11 @@ private static int CalculateBestCacheSize(ReadOnlySpan bgra, int quality, } } + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int i = 0; i <= cacheBitsMax; i++) { - double entropy = histos[i].EstimateBits(); + double entropy = histos[i].EstimateBits(stats, bitsEntropy); if (i == 0 || entropy < entropyMin) { entropyMin = entropy; diff --git a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs index f2d4fb189f..5d407d73c1 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs @@ -152,10 +152,12 @@ private static void HistogramAnalyzeEntropyBin(List histograms, u private static int HistogramCopyAndAnalyze(List origHistograms, List histograms, ushort[] histogramSymbols) { + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int clusterId = 0, i = 0; i < origHistograms.Count; i++) { Vp8LHistogram origHistogram = origHistograms[i]; - origHistogram.UpdateHistogramCost(); + origHistogram.UpdateHistogramCost(stats, bitsEntropy); // Skip the histogram if it is completely empty, which can happen for tiles with no information (when they are skipped because of LZ77). if (!origHistogram.IsUsed[0] && !origHistogram.IsUsed[1] && !origHistogram.IsUsed[2] && !origHistogram.IsUsed[3] && !origHistogram.IsUsed[4]) @@ -175,7 +177,14 @@ private static int HistogramCopyAndAnalyze(List origHistograms, L return numUsed; } - private static void HistogramCombineEntropyBin(List histograms, ushort[] clusters, ushort[] clusterMappings, Vp8LHistogram curCombo, ushort[] binMap, int numBins, double combineCostFactor) + private static void HistogramCombineEntropyBin( + List histograms, + ushort[] clusters, + ushort[] clusterMappings, + Vp8LHistogram curCombo, + ushort[] binMap, + int numBins, + double combineCostFactor) { var binInfo = new HistogramBinInfo[BinSize]; for (int idx = 0; idx < numBins; idx++) @@ -191,6 +200,8 @@ private static void HistogramCombineEntropyBin(List histograms, u } var indicesToRemove = new List(); + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int idx = 0; idx < histograms.Count; idx++) { if (histograms[idx] == null) @@ -209,7 +220,7 @@ private static void HistogramCombineEntropyBin(List histograms, u // Try to merge #idx into #first (both share the same binId) double bitCost = histograms[idx].BitCost; double bitCostThresh = -bitCost * combineCostFactor; - double currCostDiff = histograms[first].AddEval(histograms[idx], bitCostThresh, curCombo); + double currCostDiff = histograms[first].AddEval(histograms[idx], stats, bitsEntropy, bitCostThresh, curCombo); if (currCostDiff < bitCostThresh) { @@ -308,6 +319,8 @@ private static bool HistogramCombineStochastic(List histograms, i int numUsed = histograms.Count(h => h != null); int outerIters = numUsed; int numTriesNoSuccess = outerIters / 2; + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); if (numUsed < minClusterSize) { @@ -354,7 +367,7 @@ private static bool HistogramCombineStochastic(List histograms, i idx2 = mappings[idx2]; // Calculate cost reduction on combination. - double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost); + double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost, stats, bitsEntropy); // Found a better pair? if (currCost < 0) @@ -428,7 +441,7 @@ private static bool HistogramCombineStochastic(List histograms, i if (doEval) { // Re-evaluate the cost of an updated pair. - HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], 0.0d, p); + HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], stats, bitsEntropy, 0.0d, p); if (p.CostDiff >= 0.0d) { histoPriorityList[j] = histoPriorityList[histoPriorityList.Count - 1]; @@ -456,6 +469,8 @@ private static void HistogramCombineGreedy(List histograms) // Priority list of histogram pairs. var histoPriorityList = new List(); int maxSize = histoSize * histoSize; + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int i = 0; i < histoSize; i++) { @@ -471,7 +486,7 @@ private static void HistogramCombineGreedy(List histograms) continue; } - HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d); + HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d, stats, bitsEntropy); } } @@ -510,7 +525,7 @@ private static void HistogramCombineGreedy(List histograms) continue; } - HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d); + HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d, stats, bitsEntropy); } } } @@ -519,6 +534,8 @@ private static void HistogramRemap(List input, List 1) { for (int i = 0; i < inSize; i++) @@ -534,7 +551,7 @@ private static void HistogramRemap(List input, List input, List /// The cost of the pair, or 0 if it superior to threshold. - private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold) + private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy) { var pair = new HistogramPair(); @@ -598,7 +615,7 @@ private static double HistoPriorityListPush(List histoList, int m Vp8LHistogram h1 = histograms[idx1]; Vp8LHistogram h2 = histograms[idx2]; - HistoListUpdatePair(h1, h2, threshold, pair); + HistoListUpdatePair(h1, h2, stats, bitsEntropy, threshold, pair); // Do not even consider the pair if it does not improve the entropy. if (pair.CostDiff >= threshold) @@ -616,11 +633,11 @@ private static double HistoPriorityListPush(List histoList, int m /// /// Update the cost diff and combo of a pair of histograms. This needs to be called when the the histograms have been merged with a third one. /// - private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, double threshold, HistogramPair pair) + private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double threshold, HistogramPair pair) { double sumCost = h1.BitCost + h2.BitCost; pair.CostCombo = 0.0d; - h1.GetCombinedHistogramEntropy(h2, sumCost + threshold, costInitial: pair.CostCombo, out double cost); + h1.GetCombinedHistogramEntropy(h2, stats, bitsEntropy, sumCost + threshold, costInitial: pair.CostCombo, out double cost); pair.CostCombo = cost; pair.CostDiff = pair.CostCombo - sumCost; } diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs index cd8be9aac3..0376311ed9 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs @@ -49,14 +49,13 @@ public static int Compare(HuffmanTree t1, HuffmanTree t2) { return -1; } - else if (t1.TotalCount < t2.TotalCount) + + if (t1.TotalCount < t2.TotalCount) { return 1; } - else - { - return t1.Value < t2.Value ? -1 : 1; - } + + return t1.Value < t2.Value ? -1 : 1; } public IDeepCloneable DeepClone() => new HuffmanTree(this); diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index b7f94415be..06204ae913 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -704,7 +704,7 @@ public static void BundleColorMap(Span row, int width, int xBits, Span /// Shanon entropy. - public static float CombinedShannonEntropy(int[] x, int[] y) + public static float CombinedShannonEntropy(Span x, Span y) { double retVal = 0.0d; uint sumX = 0, sumXY = 0; diff --git a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs index 2d71a7af64..6cd109121d 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs @@ -15,7 +15,7 @@ internal class PixOrCopy public uint BgraOrDistance { get; set; } public static PixOrCopy CreateCacheIdx(int idx) => - new PixOrCopy() + new() { Mode = PixOrCopyMode.CacheIdx, BgraOrDistance = (uint)idx, @@ -23,14 +23,14 @@ public static PixOrCopy CreateCacheIdx(int idx) => }; public static PixOrCopy CreateLiteral(uint bgra) => - new PixOrCopy() + new() { Mode = PixOrCopyMode.Literal, BgraOrDistance = bgra, Len = 1 }; - public static PixOrCopy CreateCopy(uint distance, ushort len) => new PixOrCopy() + public static PixOrCopy CreateCopy(uint distance, ushort len) => new() { Mode = PixOrCopyMode.Copy, BgraOrDistance = distance, diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index 671e9a043e..713fc79194 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -17,6 +17,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// internal static unsafe class PredictorEncoder { + private static readonly sbyte[] DeltaLut = { 16, 16, 8, 4, 2, 2, 2 }; + + private static readonly sbyte[][] Offset = + { + new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } + }; + private const int GreenRedToBlueNumAxis = 8; private const int GreenRedToBlueMaxIters = 7; @@ -41,6 +48,8 @@ public static void ResidualImage( Span bgra, Span bgraScratch, Span image, + int[][] histoArgb, + int[][] bestHisto, bool nearLossless, int nearLosslessQuality, WebpTransparentColorMode transparentColorMode, @@ -80,6 +89,8 @@ public static void ResidualImage( histo, bgraScratch, bgra, + histoArgb, + bestHisto, maxQuantization, transparentColorMode, usedSubtractGreen, @@ -105,7 +116,7 @@ public static void ResidualImage( lowEffort); } - public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span bgra, Span image) + public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span bgra, Span image, Span scratch) { int maxTileSize = 1 << bits; int tileXSize = LosslessUtils.SubSampleSize(width, bits); @@ -139,7 +150,8 @@ public static void ColorSpaceTransform(int width, int height, int bits, int qual height, accumulatedRedHisto, accumulatedBlueHisto, - bgra); + bgra, + scratch); image[offset] = MultipliersToColorCode(prevX); CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, bgra); @@ -188,6 +200,8 @@ private static int GetBestPredictorForTile( int[][] accumulated, Span argbScratch, Span argb, + int[][] histoArgb, + int[][] bestHisto, int maxQuantization, WebpTransparentColorMode transparentColorMode, bool usedSubtractGreen, @@ -222,21 +236,14 @@ private static int GetBestPredictorForTile( float bestDiff = MaxDiffCost; int bestMode = 0; uint[] residuals = new uint[1 << WebpConstants.MaxTransformBits]; - int[][] histoArgb = new int[4][]; - int[][] bestHisto = new int[4][]; for (int i = 0; i < 4; i++) { - histoArgb[i] = new int[256]; - bestHisto[i] = new int[256]; + histoArgb[i].AsSpan().Clear(); + bestHisto[i].AsSpan().Clear(); } for (int mode = 0; mode < numPredModes; mode++) { - for (int i = 0; i < 4; i++) - { - histoArgb[i].AsSpan().Fill(0); - } - if (startY > 0) { // Read the row above the tile which will become the first upper_row. @@ -300,6 +307,11 @@ private static int GetBestPredictorForTile( bestDiff = curDiff; bestMode = mode; } + + for (int i = 0; i < 4; i++) + { + histoArgb[i].AsSpan().Clear(); + } } for (int i = 0; i < 4; i++) @@ -819,7 +831,19 @@ private static void CopyTileWithColorTransform(int xSize, int ySize, int tileX, } } - private static Vp8LMultipliers GetBestColorTransformForTile(int tileX, int tileY, int bits, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int xSize, int ySize, int[] accumulatedRedHisto, int[] accumulatedBlueHisto, Span argb) + private static Vp8LMultipliers GetBestColorTransformForTile( + int tileX, + int tileY, + int bits, + Vp8LMultipliers prevX, + Vp8LMultipliers prevY, + int quality, + int xSize, + int ySize, + int[] accumulatedRedHisto, + int[] accumulatedBlueHisto, + Span argb, + Span scratch) { int maxTileSize = 1 << bits; int tileYOffset = tileY * maxTileSize; @@ -832,18 +856,28 @@ private static Vp8LMultipliers GetBestColorTransformForTile(int tileX, int tileY var bestTx = default(Vp8LMultipliers); - GetBestGreenToRed(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx); + GetBestGreenToRed(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx); - GetBestGreenRedToBlue(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx); + GetBestGreenRedToBlue(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx); return bestTx; } - private static void GetBestGreenToRed(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedRedHisto, ref Vp8LMultipliers bestTx) + private static void GetBestGreenToRed( + Span argb, + int stride, + Span scratch, + int tileWidth, + int tileHeight, + Vp8LMultipliers prevX, + Vp8LMultipliers prevY, + int quality, + int[] accumulatedRedHisto, + ref Vp8LMultipliers bestTx) { int maxIters = 4 + ((7 * quality) >> 8); // in range [4..6] int greenToRedBest = 0; - double bestDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto); + double bestDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto); for (int iter = 0; iter < maxIters; iter++) { // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to @@ -855,7 +889,7 @@ private static void GetBestGreenToRed(Span argb, int stride, int tileWidth for (int offset = -delta; offset <= delta; offset += 2 * delta) { int greenToRedCur = offset + greenToRedBest; - double curDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto); + double curDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto); if (curDiff < bestDiff) { bestDiff = curDiff; @@ -867,24 +901,22 @@ private static void GetBestGreenToRed(Span argb, int stride, int tileWidth bestTx.GreenToRed = (byte)(greenToRedBest & 0xff); } - private static void GetBestGreenRedToBlue(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx) + private static void GetBestGreenRedToBlue(Span argb, int stride, Span scratch, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx) { int iters = (quality < 25) ? 1 : (quality > 50) ? GreenRedToBlueMaxIters : 4; int greenToBlueBest = 0; int redToBlueBest = 0; - sbyte[][] offset = { new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } }; - sbyte[] deltaLut = { 16, 16, 8, 4, 2, 2, 2 }; // Initial value at origin: - double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto); + double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto); for (int iter = 0; iter < iters; iter++) { - int delta = deltaLut[iter]; + int delta = DeltaLut[iter]; for (int axis = 0; axis < GreenRedToBlueNumAxis; axis++) { - int greenToBlueCur = (offset[axis][0] * delta) + greenToBlueBest; - int redToBlueCur = (offset[axis][1] * delta) + redToBlueBest; - double curDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto); + int greenToBlueCur = (Offset[axis][0] * delta) + greenToBlueBest; + int redToBlueCur = (Offset[axis][1] * delta) + redToBlueBest; + double curDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto); if (curDiff < bestDiff) { bestDiff = curDiff; @@ -910,9 +942,19 @@ private static void GetBestGreenRedToBlue(Span argb, int stride, int tileW bestTx.RedToBlue = (byte)(redToBlueBest & 0xff); } - private static double GetPredictionCostCrossColorRed(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToRed, int[] accumulatedRedHisto) + private static double GetPredictionCostCrossColorRed( + Span argb, + int stride, + Span scratch, + int tileWidth, + int tileHeight, + Vp8LMultipliers prevX, + Vp8LMultipliers prevY, + int greenToRed, + int[] accumulatedRedHisto) { - int[] histo = new int[256]; + Span histo = scratch.Slice(0, 256); + histo.Clear(); CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo); double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo); @@ -937,9 +979,20 @@ private static double GetPredictionCostCrossColorRed(Span argb, int stride return curDiff; } - private static double GetPredictionCostCrossColorBlue(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToBlue, int redToBlue, int[] accumulatedBlueHisto) + private static double GetPredictionCostCrossColorBlue( + Span argb, + int stride, + Span scratch, + int tileWidth, + int tileHeight, + Vp8LMultipliers prevX, + Vp8LMultipliers prevY, + int greenToBlue, + int redToBlue, + int[] accumulatedBlueHisto) { - int[] histo = new int[256]; + Span histo = scratch.Slice(0, 256); + histo.Clear(); CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo); double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo); @@ -980,7 +1033,7 @@ private static double GetPredictionCostCrossColorBlue(Span argb, int strid return curDiff; } - private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo) + private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse41.IsSupported) @@ -1036,7 +1089,7 @@ private static void CollectColorRedTransforms(Span bgra, int stride, int t } } - private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo) + private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo) { int pos = 0; while (tileHeight-- > 0) @@ -1051,7 +1104,7 @@ private static void CollectColorRedTransformsNoneVectorized(Span bgra, int } } - private static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo) + private static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse41.IsSupported) @@ -1114,7 +1167,7 @@ private static void CollectColorBlueTransforms(Span bgra, int stride, int } } - private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo) + private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo) { int pos = 0; while (tileHeight-- > 0) @@ -1143,7 +1196,7 @@ private static float PredictionCostSpatialHistogram(int[][] accumulated, int[][] } [MethodImpl(InliningOptions.ShortMethod)] - private static double PredictionCostCrossColor(int[] accumulated, int[] counts) + private static double PredictionCostCrossColor(int[] accumulated, Span counts) { // Favor low entropy, locally and globally. // Favor small absolute values for PredictionCostSpatial. @@ -1152,7 +1205,7 @@ private static double PredictionCostCrossColor(int[] accumulated, int[] counts) } [MethodImpl(InliningOptions.ShortMethod)] - private static float PredictionCostSpatial(int[] counts, int weight0, double expVal) + private static float PredictionCostSpatial(Span counts, int weight0, double expVal) { int significantSymbols = 256 >> 4; double expDecayFactor = 0.6; diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 693585637c..818488696e 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -19,6 +19,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// internal class Vp8LEncoder : IDisposable { + /// + /// Scratch buffer to reduce allocations. + /// + private readonly int[] scratch = new int[256]; + + private int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] }; + + private int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] }; + /// /// The to use for buffer allocations. /// @@ -76,6 +85,8 @@ internal class Vp8LEncoder : IDisposable private const int PaletteInvSize = 1 << PaletteInvSizeBits; + private static readonly byte[] Order = { 1, 2, 0, 3 }; + /// /// Initializes a new instance of the class. /// @@ -675,6 +686,8 @@ private void ApplyPredictFilter(int width, int height, bool lowEffort) this.EncodedData.GetSpan(), this.BgraScratch.GetSpan(), this.TransformData.GetSpan(), + this.histoArgb, + this.bestHisto, this.nearLossless, nearLosslessStrength, this.transparentColorMode, @@ -694,7 +707,7 @@ private void ApplyCrossColorFilter(int width, int height, bool lowEffort) int transformWidth = LosslessUtils.SubSampleSize(width, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(height, colorTransformBits); - PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan()); + PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan(), this.scratch); this.bitWriter.PutBits(WebpConstants.TransformPresent, 1); this.bitWriter.PutBits((uint)Vp8LTransformType.CrossColorTransform, 2); @@ -736,7 +749,7 @@ private void EncodeImageNoHuffman(Span bgra, Vp8LHashChain hashChain, Vp8L var histogramImage = new List() { - new Vp8LHistogram(cacheBits) + new(cacheBits) }; // Build histogram image and symbols from backward references. @@ -780,7 +793,8 @@ private void EncodeImageNoHuffman(Span bgra, Vp8LHashChain hashChain, Vp8L private void StoreHuffmanCode(HuffmanTree[] huffTree, HuffmanTreeToken[] tokens, HuffmanTreeCode huffmanCode) { int count = 0; - int[] symbols = { 0, 0 }; + Span symbols = this.scratch.AsSpan(0, 2); + symbols.Clear(); int maxBits = 8; int maxSymbol = 1 << maxBits; @@ -973,10 +987,9 @@ private void StoreImageToBitMask(int width, int histoBits, Vp8LBackwardRefs back if (v.IsLiteral()) { - byte[] order = { 1, 2, 0, 3 }; for (int k = 0; k < 4; k++) { - int code = (int)v.Literal(order[k]); + int code = (int)v.Literal(Order[k]); this.bitWriter.WriteHuffmanCode(codes[k], code); } } @@ -1092,9 +1105,10 @@ private EntropyIx AnalyzeEntropy(ReadOnlySpan bgra, int width, int height, histo[(int)HistoIx.HistoBluePred * 256]++; histo[(int)HistoIx.HistoAlphaPred * 256]++; + var bitEntropy = new Vp8LBitEntropy(); for (int j = 0; j < (int)HistoIx.HistoTotal; j++) { - var bitEntropy = new Vp8LBitEntropy(); + bitEntropy.Init(); Span curHisto = histo.Slice(j * 256, 256); bitEntropy.BitsEntropyUnrefined(curHisto, 256); entropyComp[j] = bitEntropy.BitsEntropyRefine(); @@ -1447,7 +1461,8 @@ private static int SearchColorNoIdx(uint[] sorted, uint color, int hi) { return mid; } - else if (sorted[mid] < color) + + if (sorted[mid] < color) { low = mid; } diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs index 42260e2b25..8b02015687 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs @@ -157,29 +157,30 @@ public void AddSinglePixOrCopy(PixOrCopy v, bool useDistanceModifier, int xSize /// Estimate how many bits the combined entropy of literals and distance approximately maps to. /// /// Estimated bits. - public double EstimateBits() + public double EstimateBits(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy) { uint notUsed = 0; return - PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0]) - + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1]) - + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2]) - + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3]) - + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0], stats, bitsEntropy) + + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1], stats, bitsEntropy) + + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2], stats, bitsEntropy) + + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3], stats, bitsEntropy) + + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); } - public void UpdateHistogramCost() + public void UpdateHistogramCost(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy) { uint alphaSym = 0, redSym = 0, blueSym = 0; uint notUsed = 0; - double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3]); - double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); + + double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3], stats, bitsEntropy); + double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); int numCodes = this.NumCodes(); - this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0]) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); - this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1]); - this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2]); + this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0], stats, bitsEntropy) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); + this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1], stats, bitsEntropy); + this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2], stats, bitsEntropy); this.BitCost = this.LiteralCost + this.RedCost + this.BlueCost + alphaCost + distanceCost; if ((alphaSym | redSym | blueSym) == NonTrivialSym) { @@ -198,11 +199,11 @@ public void UpdateHistogramCost() /// Since the previous score passed is 'costThreshold', we only need to compare /// the partial cost against 'costThreshold + C(a) + C(b)' to possibly bail-out early. /// - public double AddEval(Vp8LHistogram b, double costThreshold, Vp8LHistogram output) + public double AddEval(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold, Vp8LHistogram output) { double sumCost = this.BitCost + b.BitCost; costThreshold += sumCost; - if (this.GetCombinedHistogramEntropy(b, costThreshold, costInitial: 0, out double cost)) + if (this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial: 0, out double cost)) { this.Add(b, output); output.BitCost = cost; @@ -212,10 +213,10 @@ public double AddEval(Vp8LHistogram b, double costThreshold, Vp8LHistogram outpu return cost - sumCost; } - public double AddThresh(Vp8LHistogram b, double costThreshold) + public double AddThresh(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold) { double costInitial = -this.BitCost; - this.GetCombinedHistogramEntropy(b, costThreshold, costInitial, out double cost); + this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial, out double cost); return cost; } @@ -239,12 +240,12 @@ public void Add(Vp8LHistogram b, Vp8LHistogram output) : NonTrivialSym; } - public bool GetCombinedHistogramEntropy(Vp8LHistogram b, double costThreshold, double costInitial, out double cost) + public bool GetCombinedHistogramEntropy(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy, double costThreshold, double costInitial, out double cost) { bool trivialAtEnd = false; cost = costInitial; - cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false); + cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false, stats, bitEntropy); cost += ExtraCostCombined(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), b.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); @@ -267,25 +268,25 @@ public bool GetCombinedHistogramEntropy(Vp8LHistogram b, double costThreshold, d } } - cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd); + cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd, stats, bitEntropy); if (cost > costThreshold) { return false; } - cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd); + cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd, stats, bitEntropy); if (cost > costThreshold) { return false; } - cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd); + cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd, stats, bitEntropy); if (cost > costThreshold) { return false; } - cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false); + cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false, stats, bitEntropy); if (cost > costThreshold) { return false; @@ -415,9 +416,10 @@ private void AddDistance(Vp8LHistogram b, Vp8LHistogram output, int size) } } - private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd) + private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy) { - var stats = new Vp8LStreaks(); + stats.Clear(); + bitEntropy.Init(); if (trivialAtEnd) { // This configuration is due to palettization that transforms an indexed @@ -435,7 +437,6 @@ private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool is return stats.FinalHuffmanCost(); } - var bitEntropy = new Vp8LBitEntropy(); if (isXUsed) { if (isYUsed) @@ -479,10 +480,10 @@ private static double ExtraCostCombined(Span x, Span y, int length) /// /// Get the symbol entropy for the distribution 'population'. /// - private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed) + private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy) { - var bitEntropy = new Vp8LBitEntropy(); - var stats = new Vp8LStreaks(); + bitEntropy.Init(); + stats.Clear(); bitEntropy.BitsEntropyUnrefined(population, length, stats); trivialSym = (bitEntropy.NoneZeros == 1) ? bitEntropy.NoneZeroCode : NonTrivialSym; diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs index 27ddcfd434..df9f064426 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs @@ -1,6 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; + namespace SixLabors.ImageSharp.Formats.Webp.Lossless { internal class Vp8LStreaks @@ -28,6 +30,13 @@ public Vp8LStreaks() /// public int[][] Streaks { get; } + public void Clear() + { + this.Counts.AsSpan().Clear(); + this.Streaks[0].AsSpan().Clear(); + this.Streaks[1].AsSpan().Clear(); + } + public double FinalHuffmanCost() { // The constants in this function are experimental and got rounded from diff --git a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs index 768365e44e..4f7a4eb3d8 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs @@ -418,6 +418,7 @@ private void ReadHuffmanCodes(Vp8LDecoder decoder, int xSize, int ySize, int col var huffmanTables = new HuffmanCode[numHTreeGroups * tableSize]; var hTreeGroups = new HTreeGroup[numHTreeGroups]; Span huffmanTable = huffmanTables.AsSpan(); + int[] codeLengths = new int[maxAlphabetSize]; for (int i = 0; i < numHTreeGroupsMax; i++) { hTreeGroups[i] = new HTreeGroup(HuffmanUtils.HuffmanPackedTableSize); @@ -425,7 +426,7 @@ private void ReadHuffmanCodes(Vp8LDecoder decoder, int xSize, int ySize, int col int totalSize = 0; bool isTrivialLiteral = true; int maxBits = 0; - int[] codeLengths = new int[maxAlphabetSize]; + codeLengths.AsSpan().Clear(); for (int j = 0; j < WebpConstants.HuffmanCodesPerMetaCode; j++) { int alphabetSize = WebpConstants.AlphabetSize[j]; diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 1584237b0c..d31857d53b 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -58,14 +58,14 @@ public static void Copy(Span src, Span dst, int w, int h) } [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Disto16X16(Span a, Span b, Span w) + public static int Vp8Disto16X16(Span a, Span b, Span w, Span scratch) { int d = 0; for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps) { for (int x = 0; x < 16; x += 4) { - d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w); + d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w, scratch); } } @@ -73,10 +73,10 @@ public static int Vp8Disto16X16(Span a, Span b, Span w) } [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Disto4X4(Span a, Span b, Span w) + public static int Vp8Disto4X4(Span a, Span b, Span w, Span scratch) { - int sum1 = TTransform(a, w); - int sum2 = TTransform(b, w); + int sum1 = TTransform(a, w, scratch); + int sum2 = TTransform(b, w, scratch); return Math.Abs(sum2 - sum1) >> 5; } @@ -252,18 +252,14 @@ public static void DC4(Span dst, Span yuv, int offset) [MethodImpl(InliningOptions.ShortMethod)] public static void TM4(Span dst, Span yuv, int offset) => TrueMotion(dst, yuv, offset, 4); - public static void VE4(Span dst, Span yuv, int offset) + public static void VE4(Span dst, Span yuv, int offset, Span vals) { // vertical int topOffset = offset - WebpConstants.Bps; - byte[] vals = - { - Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]), - Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]), - Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]), - Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4]) - }; - + vals[0] = Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]); + vals[1] = Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]); + vals[2] = Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]); + vals[3] = Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4]); int endIdx = 4 * WebpConstants.Bps; for (int i = 0; i < endIdx; i += WebpConstants.Bps) { @@ -504,9 +500,10 @@ public static void HU4(Span dst, Span yuv, int offset) /// /// Paragraph 14.3: Implementation of the Walsh-Hadamard transform inversion. /// - public static void TransformWht(Span input, Span output) + public static void TransformWht(Span input, Span output, Span scratch) { - int[] tmp = new int[16]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); for (int i = 0; i < 4; i++) { int iPlus4 = 4 + i; @@ -544,10 +541,11 @@ public static void TransformWht(Span input, Span output) /// Returns the weighted sum of the absolute value of transformed coefficients. /// w[] contains a row-major 4 by 4 symmetric matrix. /// - public static int TTransform(Span input, Span w) + public static int TTransform(Span input, Span w, Span scratch) { int sum = 0; - int[] tmp = new int[16]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); // horizontal pass. int inputOffset = 0; @@ -591,15 +589,16 @@ public static int TTransform(Span input, Span w) return sum; } - public static void TransformTwo(Span src, Span dst) + public static void TransformTwo(Span src, Span dst, Span scratch) { - TransformOne(src, dst); - TransformOne(src.Slice(16), dst.Slice(4)); + TransformOne(src, dst, scratch); + TransformOne(src.Slice(16), dst.Slice(4), scratch); } - public static void TransformOne(Span src, Span dst) + public static void TransformOne(Span src, Span dst, Span scratch) { - Span tmp = stackalloc int[4 * 4]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); int tmpOffset = 0; for (int srcOffset = 0; srcOffset < 4; srcOffset++) { @@ -671,10 +670,10 @@ public static void TransformAc3(Span src, Span dst) Store2(dst, 3, a - d4, d1, c1); } - public static void TransformUv(Span src, Span dst) + public static void TransformUv(Span src, Span dst, Span scratch) { - TransformTwo(src.Slice(0 * 16), dst); - TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps)); + TransformTwo(src.Slice(0 * 16), dst, scratch); + TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps), scratch); } public static void TransformDcuv(Span src, Span dst) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 2ed4381660..18d7494f0f 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -31,7 +31,9 @@ public static void PickBestIntra16(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Se int lambda = dqm.LambdaI16; int tlambda = dqm.TLambda; Span src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); + Span scratch = it.Scratch3; var rdTmp = new Vp8ModeScore(); + var res = new Vp8Residual(); Vp8ModeScore rdCur = rdTmp; Vp8ModeScore rdBest = rd; int mode; @@ -39,7 +41,7 @@ public static void PickBestIntra16(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Se rd.ModeI16 = -1; for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { - // scratch buffer. + // Scratch buffer. Span tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); rdCur.ModeI16 = mode; @@ -48,9 +50,9 @@ public static void PickBestIntra16(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Se // Measure RD-score. rdCur.D = LossyUtils.Vp8Sse16X16(src, tmpDst); - rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY)) : 0; + rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0; rdCur.H = WebpConstants.Vp8FixedCostsI16[mode]; - rdCur.R = it.GetCostLuma16(rdCur, proba); + rdCur.R = it.GetCostLuma16(rdCur, proba, res); if (isFlat) { @@ -101,6 +103,7 @@ public static bool PickBestIntra4(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Seg int tlambda = dqm.TLambda; Span src0 = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span bestBlocks = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); + Span scratch = it.Scratch3; int totalHeaderBits = 0; var rdBest = new Vp8ModeScore(); @@ -113,31 +116,35 @@ public static bool PickBestIntra4(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Seg rdBest.H = 211; // '211' is the value of VP8BitCost(0, 145) rdBest.SetRdScore(dqm.LambdaMode); it.StartI4(); + var rdi4 = new Vp8ModeScore(); + var rdTmp = new Vp8ModeScore(); + var res = new Vp8Residual(); + Span tmpLevels = new short[16]; do { int numBlocks = 1; - var rdi4 = new Vp8ModeScore(); + rdi4.Clear(); int mode; int bestMode = -1; Span src = src0.Slice(WebpLookupTables.Vp8Scan[it.I4]); short[] modeCosts = it.GetCostModeI4(rd.ModesI4); Span bestBlock = bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4]); Span tmpDst = it.Scratch.AsSpan(); - tmpDst.Fill(0); + tmpDst.Clear(); rdi4.InitScore(); it.MakeIntra4Preds(); for (mode = 0; mode < WebpConstants.NumBModes; ++mode) { - var rdTmp = new Vp8ModeScore(); - short[] tmpLevels = new short[16]; + rdTmp.Clear(); + tmpLevels.Clear(); // Reconstruct. rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode); // Compute RD-score. rdTmp.D = LossyUtils.Vp8Sse4X4(src, tmpDst); - rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY)) : 0; + rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0; rdTmp.H = modeCosts[mode]; // Add flatness penalty, to avoid flat area to be mispredicted by a complex mode. @@ -150,15 +157,15 @@ public static bool PickBestIntra4(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Seg rdTmp.R = 0; } - // early-out check. + // Early-out check. rdTmp.SetRdScore(lambda); if (bestMode >= 0 && rdTmp.Score >= rdi4.Score) { continue; } - // finish computing score. - rdTmp.R += it.GetCostLuma4(tmpLevels, proba); + // Finish computing score. + rdTmp.R += it.GetCostLuma4(tmpLevels, proba, res); rdTmp.SetRdScore(lambda); if (bestMode < 0 || rdTmp.Score < rdi4.Score) @@ -213,13 +220,15 @@ public static void PickBestUv(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Segment Span dst0 = it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc); Span dst = dst0; var rdBest = new Vp8ModeScore(); + var rdUv = new Vp8ModeScore(); + var res = new Vp8Residual(); int mode; rd.ModeUv = -1; rdBest.InitScore(); for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { - var rdUv = new Vp8ModeScore(); + rdUv.Clear(); // Reconstruct rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode); @@ -228,7 +237,7 @@ public static void PickBestUv(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Segment rdUv.D = LossyUtils.Vp8Sse16X8(src, tmpDst); rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas. rdUv.H = WebpConstants.Vp8FixedCostsUv[mode]; - rdUv.R = it.GetCostUv(rdUv, proba); + rdUv.R = it.GetCostUv(rdUv, proba, res); if (mode > 0 && IsFlat(rdUv.UvLevels, numBlocks, WebpConstants.FlatnessLimitIUv)) { rdUv.R += WebpConstants.FlatnessPenality * numBlocks; @@ -271,16 +280,24 @@ public static int ReconstructIntra16(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8M Span src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); int nz = 0; int n; - short[] dcTmp = new short[16]; - short[] tmp = new short[16 * 16]; - Span tmpSpan = tmp.AsSpan(); + Span shortScratchSpan = it.Scratch2.AsSpan(); + Span scratch = it.Scratch3.AsSpan(0, 16); + shortScratchSpan.Clear(); + scratch.Clear(); + Span dcTmp = shortScratchSpan.Slice(0, 16); + Span tmp = shortScratchSpan.Slice(16, 16 * 16); for (n = 0; n < 16; n += 2) { - Vp8Encoding.FTransform2(src.Slice(WebpLookupTables.Vp8Scan[n]), reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 16), tmpSpan.Slice((n + 1) * 16, 16)); + Vp8Encoding.FTransform2( + src.Slice(WebpLookupTables.Vp8Scan[n]), + reference.Slice(WebpLookupTables.Vp8Scan[n]), + tmp.Slice(n * 16, 16), + tmp.Slice((n + 1) * 16, 16), + scratch); } - Vp8Encoding.FTransformWht(tmp, dcTmp); + Vp8Encoding.FTransformWht(tmp, dcTmp, scratch); nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24; for (n = 0; n < 16; n += 2) @@ -288,14 +305,14 @@ public static int ReconstructIntra16(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8M // Zero-out the first coeff, so that: a) nz is correct below, and // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. tmp[n * 16] = tmp[(n + 1) * 16] = 0; - nz |= Quantize2Blocks(tmpSpan.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; } // Transform back. - LossyUtils.TransformWht(dcTmp, tmpSpan); + LossyUtils.TransformWht(dcTmp, tmp, scratch); for (n = 0; n < 16; n += 2) { - Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true); + Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true, scratch); } return nz; @@ -304,10 +321,13 @@ public static int ReconstructIntra16(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8M public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span levels, Span src, Span yuvOut, int mode) { Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); - short[] tmp = new short[16]; - Vp8Encoding.FTransform(src, reference, tmp); + Span tmp = it.Scratch2.AsSpan(0, 16); + Span scratch = it.Scratch3.AsSpan(0, 16); + tmp.Clear(); + scratch.Clear(); + Vp8Encoding.FTransform(src, reference, tmp, scratch); int nz = QuantizeBlock(tmp, levels, dqm.Y1); - Vp8Encoding.ITransform(reference, tmp, yuvOut, false); + Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch); return nz; } @@ -318,27 +338,31 @@ public static int ReconstructUv(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8ModeSc Span src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc); int nz = 0; int n; - short[] tmp = new short[8 * 16]; + Span tmp = it.Scratch2.AsSpan(0, 8 * 16); + Span scratch = it.Scratch3.AsSpan(0, 16); + tmp.Clear(); + scratch.Clear(); for (n = 0; n < 8; n += 2) { Vp8Encoding.FTransform2( src.Slice(WebpLookupTables.Vp8ScanUv[n]), reference.Slice(WebpLookupTables.Vp8ScanUv[n]), - tmp.AsSpan(n * 16, 16), - tmp.AsSpan((n + 1) * 16, 16)); + tmp.Slice(n * 16, 16), + tmp.Slice((n + 1) * 16, 16), + scratch); } CorrectDcValues(it, dqm.Uv, tmp, rd); for (n = 0; n < 8; n += 2) { - nz |= Quantize2Blocks(tmp.AsSpan(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; } for (n = 0; n < 8; n += 2) { - Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.AsSpan(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true); + Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true, scratch); } return nz << 16; @@ -556,7 +580,7 @@ public static int QuantizeSingle(Span v, Vp8Matrix mtx) return (sign ? -v0 : v0) >> DSCALE; } - public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, short[] tmp, Vp8ModeScore rd) + public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) { #pragma warning disable SA1005 // Single line comments should begin with single space // | top[0] | top[1] @@ -571,7 +595,7 @@ public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, short[] tmp { Span top = it.TopDerr.AsSpan((it.X * 4) + ch, 2); Span left = it.LeftDerr.AsSpan(ch, 2); - Span c = tmp.AsSpan(ch * 4 * 16, 4 * 16); + Span c = tmp.Slice(ch * 4 * 16, 4 * 16); c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE)); int err0 = QuantizeSingle(c, mtx); c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE)); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs index ca3f8481e2..79fd8d8543 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs @@ -81,6 +81,8 @@ public Vp8EncIterator(byte[] yTop, byte[] uvTop, uint[] nz, Vp8MacroBlockInfo[] this.I4Boundary = new byte[37]; this.BitCount = new long[4, 3]; this.Scratch = new byte[WebpConstants.Bps * 16]; + this.Scratch2 = new short[17 * 16]; + this.Scratch3 = new int[16]; // To match the C initial values of the reference implementation, initialize all with 204. byte defaultInitVal = 204; @@ -216,10 +218,20 @@ public Vp8EncIterator(byte[] yTop, byte[] uvTop, uint[] nz, Vp8MacroBlockInfo[] public int CountDown { get; set; } /// - /// Gets the scratch buffer. + /// Gets the byte scratch buffer. /// public byte[] Scratch { get; } + /// + /// Gets the short scratch buffer. + /// + public short[] Scratch2 { get; } + + /// + /// Gets the int scratch buffer. + /// + public int[] Scratch3 { get; } + public Vp8MacroBlockInfo CurrentMacroBlockInfo => this.Mb[this.currentMbIdx]; private Vp8MacroBlockInfo[] Mb { get; } @@ -380,7 +392,7 @@ public int MbAnalyzeBestIntra16Mode() int bestMode = 0; this.MakeLuma16Preds(); - for (mode = 0; mode < maxMode; ++mode) + for (mode = 0; mode < maxMode; mode++) { var histo = new Vp8Histogram(); histo.CollectHistogram(this.YuvIn.AsSpan(YOffEnc), this.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]), 0, 16); @@ -499,9 +511,8 @@ public void SetIntra4Mode(byte[] modes) this.CurrentMacroBlockInfo.MacroBlockType = Vp8MacroBlockType.I4X4; } - public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba) + public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res) { - var res = new Vp8Residual(); int r = 0; // re-import the non-zero context. @@ -539,11 +550,10 @@ public short[] GetCostModeI4(byte[] modes) return WebpLookupTables.Vp8FixedCostsI4[top, left]; } - public int GetCostLuma4(short[] levels, Vp8EncProba proba) + public int GetCostLuma4(Span levels, Vp8EncProba proba, Vp8Residual res) { int x = this.I4 & 3; int y = this.I4 >> 2; - var res = new Vp8Residual(); int r = 0; res.Init(0, 3, proba); @@ -553,9 +563,8 @@ public int GetCostLuma4(short[] levels, Vp8EncProba proba) return r; } - public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba) + public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res) { - var res = new Vp8Residual(); int r = 0; // re-import the non-zero context. @@ -741,7 +750,7 @@ public void MakeChroma8Preds() Vp8Encoding.EncPredChroma8(this.YuvP, left, top); } - public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx); + public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx, this.Scratch.AsSpan(0, 4)); public void SwapOut() { diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 37808d56c2..1a9d3a6e34 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -70,6 +70,11 @@ internal class Vp8Encoder : IDisposable /// private int uvAlpha; + /// + /// Scratch buffer to reduce allocations. + /// + private readonly int[] scratch = new int[16]; + private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 }; private const int NumMbSegments = 4; @@ -321,18 +326,19 @@ public void Encode(Image image, Stream stream) this.StatLoop(width, height, yStride, uvStride); it.Init(); it.InitFilter(); + var info = new Vp8ModeScore(); + var residual = new Vp8Residual(); do { bool dontUseSkip = !this.Proba.UseSkipProba; - - var info = new Vp8ModeScore(); + info.Clear(); it.Import(y, u, v, yStride, uvStride, width, height, false); // Warning! order is important: first call VP8Decimate() and // *then* decide how to code the skip decision if there's one. if (!this.Decimate(it, ref info, this.rdOptLevel) || dontUseSkip) { - this.CodeResiduals(it, info); + this.CodeResiduals(it, info, residual); } else { @@ -447,9 +453,10 @@ private long OneStatPass(int width, int height, int yStride, int uvStride, Vp8Rd it.Init(); this.SetLoopParams(stats.Q); + var info = new Vp8ModeScore(); do { - var info = new Vp8ModeScore(); + info.Clear(); it.Import(y, u, v, yStride, uvStride, width, height, false); if (this.Decimate(it, ref info, rdOpt)) { @@ -930,10 +937,9 @@ private bool Decimate(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8RdLevel rdOpt) return isSkipped; } - private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd) + private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd, Vp8Residual residual) { int x, y, ch; - var residual = new Vp8Residual(); bool i16 = it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16; int segment = it.CurrentMacroBlockInfo.Segment; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs index f8b4853e2a..0567a0f27d 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs @@ -68,22 +68,20 @@ static Vp8Encoding() } } - public static void ITransform(Span reference, Span input, Span dst, bool doTwo) + public static void ITransform(Span reference, Span input, Span dst, bool doTwo, Span scratch) { - ITransformOne(reference, input, dst); + ITransformOne(reference, input, dst, scratch); if (doTwo) { - ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4)); + ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4), scratch); } } - public static void ITransformOne(Span reference, Span input, Span dst) + public static void ITransformOne(Span reference, Span input, Span dst, Span scratch) { int i; -#pragma warning disable SA1312 // Variable names should begin with lower-case letter - int[] C = new int[4 * 4]; -#pragma warning restore SA1312 // Variable names should begin with lower-case letter - Span tmp = C.AsSpan(); + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); for (i = 0; i < 4; i++) { // vertical pass. @@ -99,7 +97,7 @@ public static void ITransformOne(Span reference, Span input, Span reference, Span input, Span src, Span reference, Span output, Span output2) + public static void FTransform2(Span src, Span reference, Span output, Span output2, Span scratch) { - FTransform(src, reference, output); - FTransform(src.Slice(4), reference.Slice(4), output2); + FTransform(src, reference, output, scratch); + FTransform(src.Slice(4), reference.Slice(4), output2, scratch); } - public static void FTransform(Span src, Span reference, Span output) + public static void FTransform(Span src, Span reference, Span output, Span scratch) { int i; - int[] tmp = new int[16]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); + int srcIdx = 0; int refIdx = 0; for (i = 0; i < 4; i++) @@ -160,9 +160,11 @@ public static void FTransform(Span src, Span reference, Span } } - public static void FTransformWht(Span input, Span output) + public static void FTransformWht(Span input, Span output, Span scratch) { - int[] tmp = new int[16]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); + int i; int inputIdx = 0; for (i = 0; i < 4; i++) @@ -234,11 +236,11 @@ public static void EncPredChroma8(Span dst, Span left, Span to // Left samples are top[-5 .. -2], top_left is top[-1], top are // located at top[0..3], and top right is top[4..7] - public static void EncPredLuma4(Span dst, Span top, int topOffset) + public static void EncPredLuma4(Span dst, Span top, int topOffset, Span vals) { Dc4(dst.Slice(I4DC4), top, topOffset); Tm4(dst.Slice(I4TM4), top, topOffset); - Ve4(dst.Slice(I4VE4), top, topOffset); + Ve4(dst.Slice(I4VE4), top, topOffset, vals); He4(dst.Slice(I4HE4), top, topOffset); Rd4(dst.Slice(I4RD4), top, topOffset); Vr4(dst.Slice(I4VR4), top, topOffset); @@ -395,20 +397,16 @@ private static void Tm4(Span dst, Span top, int topOffset) } } - private static void Ve4(Span dst, Span top, int topOffset) + private static void Ve4(Span dst, Span top, int topOffset, Span vals) { // vertical - byte[] vals = - { - LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]), - LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]), - LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]), - LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4]) - }; - + vals[0] = LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]); + vals[1] = LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]); + vals[2] = LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]); + vals[3] = LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4]); for (int i = 0; i < 4; i++) { - vals.AsSpan().CopyTo(dst.Slice(i * WebpConstants.Bps)); + vals.CopyTo(dst.Slice(i * WebpConstants.Bps)); } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs index 5d048514ea..7192fa2d05 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs @@ -8,6 +8,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { internal class Vp8Histogram { + private readonly int[] scratch = new int[16]; + + private readonly short[] output = new short[16]; + + private readonly int[] distribution = new int[MaxCoeffThresh + 1]; + /// /// Size of histogram used by CollectHistogram. /// @@ -40,23 +46,22 @@ public int GetAlpha() public void CollectHistogram(Span reference, Span pred, int startBlock, int endBlock) { int j; - int[] distribution = new int[MaxCoeffThresh + 1]; + this.distribution.AsSpan().Clear(); for (j = startBlock; j < endBlock; j++) { - short[] output = new short[16]; - - this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), output); + this.output.AsSpan().Clear(); + this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output); // Convert coefficients to bin. for (int k = 0; k < 16; ++k) { - int v = Math.Abs(output[k]) >> 3; + int v = Math.Abs(this.output[k]) >> 3; int clippedValue = ClipMax(v, MaxCoeffThresh); - ++distribution[clippedValue]; + ++this.distribution[clippedValue]; } } - this.SetHistogramData(distribution); + this.SetHistogramData(this.distribution); } public void Merge(Vp8Histogram other) @@ -97,7 +102,9 @@ private void SetHistogramData(int[] distribution) private void Vp8FTransform(Span src, Span reference, Span output) { int i; - int[] tmp = new int[16]; + Span tmp = this.scratch; + tmp.Clear(); + for (i = 0; i < 4; i++) { int d0 = src[0] - reference[0]; // 9bit dynamic range ([-255,255]) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs index 7182f60210..1c92a9d2d9 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs @@ -1,6 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; + namespace SixLabors.ImageSharp.Formats.Webp.Lossy { /// @@ -93,6 +95,22 @@ public Vp8ModeScore() /// public int[,] Derr { get; } + public void Clear() + { + this.YDcLevels.AsSpan().Clear(); + this.YAcLevels.AsSpan().Clear(); + this.UvLevels.AsSpan().Clear(); + this.ModesI4.AsSpan().Clear(); + + for (int i = 0; i < 2; i++) + { + for (int j = 0; j < 3; j++) + { + this.Derr[i, j] = 0; + } + } + } + public void InitScore() { this.D = 0; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs index 93d76e2836..2962ebbabc 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs @@ -16,7 +16,7 @@ internal class Vp8Residual public int CoeffType { get; set; } - public short[] Coeffs { get; set; } + public short[] Coeffs { get; } = new short[16]; public Vp8BandProbas[] Prob { get; set; } @@ -31,6 +31,7 @@ public void Init(int first, int coeffType, Vp8EncProba prob) this.Prob = prob.Coeffs[this.CoeffType]; this.Stats = prob.Stats[this.CoeffType]; this.Costs = prob.RemappedCosts[this.CoeffType]; + this.Coeffs.AsSpan().Clear(); } public void SetCoeffs(Span coeffs) @@ -46,7 +47,7 @@ public void SetCoeffs(Span coeffs) } } - this.Coeffs = coeffs.Slice(0, 16).ToArray(); + coeffs.Slice(0, 16).CopyTo(this.Coeffs); } // Simulate block coding, but only record statistics. diff --git a/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs b/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs index ebb0b0aa4a..4f283f9f53 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs @@ -34,6 +34,16 @@ internal sealed class WebpLossyDecoder /// private readonly Configuration configuration; + /// + /// Scratch buffer to reduce allocations. + /// + private readonly int[] scratch = new int[16]; + + /// + /// Another scratch buffer to reduce allocations. + /// + private readonly byte[] scratchBytes = new byte[4]; + /// /// Initializes a new instance of the class. /// @@ -395,7 +405,7 @@ private void ReconstructRow(Vp8Decoder dec) LossyUtils.TM4(dst, yuv, offset); break; case 2: - LossyUtils.VE4(dst, yuv, offset); + LossyUtils.VE4(dst, yuv, offset, this.scratchBytes); break; case 3: LossyUtils.HE4(dst, yuv, offset); @@ -420,7 +430,7 @@ private void ReconstructRow(Vp8Decoder dec) break; } - this.DoTransform(bits, coeffs.AsSpan(n * 16), dst); + this.DoTransform(bits, coeffs.AsSpan(n * 16), dst, this.scratch); } } else @@ -456,7 +466,7 @@ private void ReconstructRow(Vp8Decoder dec) { for (int n = 0; n < 16; ++n, bits <<= 2) { - this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n])); + this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n]), this.scratch); } } } @@ -496,8 +506,8 @@ private void ReconstructRow(Vp8Decoder dec) break; } - this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst); - this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst); + this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst, this.scratch); + this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst, this.scratch); // Stash away top samples for next block. if (mby < dec.MbHeight - 1) @@ -787,12 +797,12 @@ private void UpSample(Span topY, Span bottomY, Span topU, Span } } - private void DoTransform(uint bits, Span src, Span dst) + private void DoTransform(uint bits, Span src, Span dst, Span scratch) { switch (bits >> 30) { case 3: - LossyUtils.TransformOne(src, dst); + LossyUtils.TransformOne(src, dst, scratch); break; case 2: LossyUtils.TransformAc3(src, dst); @@ -803,7 +813,7 @@ private void DoTransform(uint bits, Span src, Span dst) } } - private void DoUVTransform(uint bits, Span src, Span dst) + private void DoUVTransform(uint bits, Span src, Span dst, Span scratch) { // any non-zero coeff at all? if ((bits & 0xff) > 0) @@ -811,7 +821,7 @@ private void DoUVTransform(uint bits, Span src, Span dst) // any non-zero AC coefficient? if ((bits & 0xaa) > 0) { - LossyUtils.TransformUv(src, dst); // note we don't use the AC3 variant for U/V. + LossyUtils.TransformUv(src, dst, scratch); // note we don't use the AC3 variant for U/V. } else { @@ -884,7 +894,7 @@ private bool ParseResiduals(Vp8Decoder dec, Vp8BitReader br, Vp8MacroBlock mb) if (nz > 1) { // More than just the DC -> perform the full transform. - LossyUtils.TransformWht(dc, dst); + LossyUtils.TransformWht(dc, dst, this.scratch); } else { diff --git a/tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs b/tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs index b480201989..d78f7e2f2a 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs @@ -90,9 +90,10 @@ private static void RunColorSpaceTransformTestWithPeakImage() int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits); uint[] transformData = new uint[transformWidth * transformHeight]; + int[] scratch = new int[256]; // act - PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData); + PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData, scratch); // assert Assert.Equal(expectedData, transformData); @@ -119,9 +120,10 @@ private static void RunColorSpaceTransformTestWithBikeImage() int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits); uint[] transformData = new uint[transformWidth * transformHeight]; + int[] scratch = new int[256]; // act - PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData); + PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData, scratch); // assert Assert.Equal(expectedData, transformData); From ed8d2afcb07d7f56e48f1b59351d229389aaea3a Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 13:26:31 +0100 Subject: [PATCH 09/85] Use Span version of Sort() to reduce allocations --- src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs | 5 +++++ src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs index f2321d6813..6320983bab 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs @@ -202,9 +202,14 @@ public static void GenerateOptimalTree(HuffmanTree[] tree, uint[] histogram, int } // Build the Huffman tree. +#if NET5_0_OR_GREATER + Span treeSlice = tree.AsSpan().Slice(0, treeSize); + treeSlice.Sort(HuffmanTree.Compare); +#else HuffmanTree[] treeCopy = tree.AsSpan().Slice(0, treeSize).ToArray(); Array.Sort(treeCopy, HuffmanTree.Compare); treeCopy.AsSpan().CopyTo(tree); +#endif if (treeSize > 1) { diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 818488696e..29dbde8b03 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -1204,9 +1204,14 @@ private bool AnalyzeAndCreatePalette(ReadOnlySpan bgra, int width, int hei return false; } +#if NET5_0_OR_GREATER + var paletteSlice = palette.Slice(0, this.PaletteSize); + paletteSlice.Sort(); +#else uint[] paletteArray = palette.Slice(0, this.PaletteSize).ToArray(); Array.Sort(paletteArray); paletteArray.CopyTo(palette); +#endif if (PaletteHasNonMonotonousDeltas(palette, this.PaletteSize)) { From 15a10126d29f5e6b9c42544bc0cb4388cf32bdfe Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 14:10:21 +0100 Subject: [PATCH 10/85] Define sse and avx masks as static readonly --- .../Formats/Webp/Lossless/LosslessUtils.cs | 65 +++++++++++-------- .../Formats/Webp/Lossless/PredictorEncoder.cs | 43 +++++++----- .../Formats/Webp/WebpCommonUtils.cs | 56 ++++++++-------- 3 files changed, 93 insertions(+), 71 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 06204ae913..c195eb0fe1 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -27,6 +27,30 @@ internal static unsafe class LosslessUtils private const double Log2Reciprocal = 1.44269504088896338700465094007086; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector256 AddGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + + private static readonly Vector128 AddGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + + private static readonly byte AddGreenToBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + + private static readonly Vector256 SubtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + + private static readonly Vector128 SubtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + + private static readonly byte SubtractGreenFromBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + + private static readonly Vector128 TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + private static readonly Vector128 TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + + private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + + private static readonly Vector128 TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); +#endif + /// /// Returns the exact index where array1 and array2 are different. For an index /// inferior or equal to bestLenMatch, the return value just has to be strictly @@ -97,7 +121,6 @@ public static void AddGreenToBlueAndRed(Span pixelData) #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) { - var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -106,7 +129,7 @@ public static void AddGreenToBlueAndRed(Span pixelData) { uint* idx = p + i; Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte(); - Vector256 in0g0g = Avx2.Shuffle(input, mask); + Vector256 in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2); Vector256 output = Avx2.Add(input, in0g0g); Avx.Store((byte*)idx, output); } @@ -119,7 +142,6 @@ public static void AddGreenToBlueAndRed(Span pixelData) } else if (Ssse3.IsSupported) { - var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -128,7 +150,7 @@ public static void AddGreenToBlueAndRed(Span pixelData) { uint* idx = p + i; Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte(); - Vector128 in0g0g = Ssse3.Shuffle(input, mask); + Vector128 in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3); Vector128 output = Sse2.Add(input, in0g0g); Sse2.Store((byte*)idx, output.AsByte()); } @@ -141,7 +163,6 @@ public static void AddGreenToBlueAndRed(Span pixelData) } else if (Sse2.IsSupported) { - byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -151,8 +172,8 @@ public static void AddGreenToBlueAndRed(Span pixelData) uint* idx = p + i; Vector128 input = Sse2.LoadVector128((ushort*)idx); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mask); - Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); Sse2.Store((byte*)idx, output); } @@ -189,7 +210,6 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData) #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) { - var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -198,7 +218,7 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData) { uint* idx = p + i; Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte(); - Vector256 in0g0g = Avx2.Shuffle(input, mask); + Vector256 in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2); Vector256 output = Avx2.Subtract(input, in0g0g); Avx.Store((byte*)idx, output); } @@ -211,7 +231,6 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData) } else if (Ssse3.IsSupported) { - var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -220,7 +239,7 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData) { uint* idx = p + i; Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte(); - Vector128 in0g0g = Ssse3.Shuffle(input, mask); + Vector128 in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3); Vector128 output = Sse2.Subtract(input, in0g0g); Sse2.Store((byte*)idx, output.AsByte()); } @@ -233,7 +252,6 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData) } else if (Sse2.IsSupported) { - byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -243,8 +261,8 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData) uint* idx = p + i; Vector128 input = Sse2.LoadVector128((ushort*)idx); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mask); - Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); Sse2.Store((byte*)idx, output); } @@ -394,9 +412,6 @@ public static void TransformColor(Vp8LMultipliers m, Span data, int numPix { Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); - var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); - byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); fixed (uint* src = data) { int idx; @@ -404,15 +419,15 @@ public static void TransformColor(Vp8LMultipliers m, Span data, int numPix { uint* pos = src + idx; Vector128 input = Sse2.LoadVector128(pos); - Vector128 a = Sse2.And(input.AsByte(), maskalphagreen); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); + Vector128 a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); Vector128 g = Sse2.ShiftRightLogical(f.AsInt32(), 16); Vector128 h = Sse2.Add(g.AsByte(), d.AsByte()); - Vector128 i = Sse2.And(h, maskredblue); + Vector128 i = Sse2.And(h, TransformColorRedBlueMask); Vector128 output = Sse2.Subtract(input.AsByte(), i); Sse2.Store((byte*)pos, output); } @@ -460,8 +475,6 @@ public static void TransformColorInverse(Vp8LMultipliers m, Span pixelData { Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); - var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); fixed (uint* src = pixelData) { int idx; @@ -469,9 +482,9 @@ public static void TransformColorInverse(Vp8LMultipliers m, Span pixelData { uint* pos = src + idx; Vector128 input = Sse2.LoadVector128(pos); - Vector128 a = Sse2.And(input.AsByte(), maskalphagreen); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); + Vector128 a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.Add(input.AsByte(), d.AsByte()); Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index 713fc79194..abb7274472 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -36,6 +36,22 @@ internal static unsafe class PredictorEncoder private const int PredLowEffort = 11; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte(); + + private static readonly Vector128 CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte(); + + private static readonly Vector128 CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + private static readonly Vector128 CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); + + private static readonly Vector128 CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + + private static readonly Vector128 CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255); + + private static readonly Vector128 CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14); +#endif + /// /// Finds the best predictor for each tile, and converts the image to residuals /// with respect to predictions. If nearLosslessQuality < 100, applies @@ -1039,9 +1055,6 @@ private static void CollectColorRedTransforms(Span bgra, int stride, int t if (Sse41.IsSupported) { var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed)); - var maskgreen = Vector128.Create(0x00ff00); - var mask = Vector128.Create((short)0xff); - const int span = 8; Span values = stackalloc ushort[span]; for (int y = 0; y < tileHeight; y++) @@ -1057,15 +1070,15 @@ private static void CollectColorRedTransforms(Span bgra, int stride, int t uint* input1Idx = src + x + (span / 2); Vector128 input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte(); Vector128 input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte(); - Vector128 g0 = Sse2.And(input0, maskgreen.AsByte()); // 0 0 | g 0 - Vector128 g1 = Sse2.And(input1, maskgreen.AsByte()); + Vector128 g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0 + Vector128 g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask); Vector128 g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0 Vector128 a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r Vector128 a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16); Vector128 a = Sse41.PackUnsignedSaturate(a0, a1); // x r Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr Vector128 c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r' - Vector128 d = Sse2.And(c, mask.AsByte()); // 0 r' + Vector128 d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r' Sse2.Store(dst, d.AsUInt16()); for (int i = 0; i < span; i++) { @@ -1113,12 +1126,6 @@ private static void CollectColorBlueTransforms(Span bgra, int stride, int Span values = stackalloc ushort[span]; var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue)); var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue)); - var maskgreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var maskgreenblue = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); - var maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); - var shufflerLow = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255); - var shufflerHigh = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14); - for (int y = 0; y < tileHeight; y++) { Span srcSpan = bgra.Slice(y * stride); @@ -1132,18 +1139,18 @@ private static void CollectColorBlueTransforms(Span bgra, int stride, int uint* input1Idx = src + x + (span / 2); Vector128 input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte(); Vector128 input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte(); - Vector128 r0 = Ssse3.Shuffle(input0, shufflerLow); - Vector128 r1 = Ssse3.Shuffle(input1, shufflerHigh); + Vector128 r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask); + Vector128 r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask); Vector128 r = Sse2.Or(r0, r1); - Vector128 gb0 = Sse2.And(input0, maskgreenblue); - Vector128 gb1 = Sse2.And(input1, maskgreenblue); + Vector128 gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask); + Vector128 gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask); Vector128 gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32()); - Vector128 g = Sse2.And(gb.AsByte(), maskgreen); + Vector128 g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask); Vector128 a = Sse2.MultiplyHigh(r.AsInt16(), multsr); Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); Vector128 c = Sse2.Subtract(gb.AsByte(), b.AsByte()); Vector128 d = Sse2.Subtract(c, a.AsByte()); - Vector128 e = Sse2.And(d, maskblue); + Vector128 e = Sse2.And(d, CollectColorBlueTransformsBlueMask); Sse2.Store(dst, e.AsUInt16()); for (int i = 0; i < span; i++) { diff --git a/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs b/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs index d6e8d0a068..4251af7428 100644 --- a/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs +++ b/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs @@ -16,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp /// internal static class WebpCommonUtils { +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector256 AlphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); + + private static readonly Vector256 All0x80Vector256 = Vector256.Create((byte)0x80).AsByte(); + + private static readonly Vector128 AlphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); + + private static readonly Vector128 All0x80 = Vector128.Create((byte)0x80).AsByte(); +#endif + /// /// Checks if the pixel row is not opaque. /// @@ -27,11 +37,6 @@ public static unsafe bool CheckNonOpaque(Span row) if (Avx2.IsSupported) { ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row); - var alphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); - Vector256 all0x80Vector256 = Vector256.Create((byte)0x80).AsByte(); - var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); - Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte(); - int i = 0; int length = (row.Length * 4) - 3; fixed (byte* src = rowBytes) @@ -42,14 +47,14 @@ public static unsafe bool CheckNonOpaque(Span row) Vector256 a1 = Avx.LoadVector256(src + i + 32).AsByte(); Vector256 a2 = Avx.LoadVector256(src + i + 64).AsByte(); Vector256 a3 = Avx.LoadVector256(src + i + 96).AsByte(); - Vector256 b0 = Avx2.And(a0, alphaMaskVector256).AsInt32(); - Vector256 b1 = Avx2.And(a1, alphaMaskVector256).AsInt32(); - Vector256 b2 = Avx2.And(a2, alphaMaskVector256).AsInt32(); - Vector256 b3 = Avx2.And(a3, alphaMaskVector256).AsInt32(); + Vector256 b0 = Avx2.And(a0, AlphaMaskVector256).AsInt32(); + Vector256 b1 = Avx2.And(a1, AlphaMaskVector256).AsInt32(); + Vector256 b2 = Avx2.And(a2, AlphaMaskVector256).AsInt32(); + Vector256 b3 = Avx2.And(a3, AlphaMaskVector256).AsInt32(); Vector256 c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16(); Vector256 c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16(); Vector256 d = Avx2.PackSignedSaturate(c0, c1).AsByte(); - Vector256 bits = Avx2.CompareEqual(d, all0x80Vector256); + Vector256 bits = Avx2.CompareEqual(d, All0x80Vector256); int mask = Avx2.MoveMask(bits); if (mask != -1) { @@ -59,7 +64,7 @@ public static unsafe bool CheckNonOpaque(Span row) for (; i + 64 <= length; i += 64) { - if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) + if (IsNoneOpaque64Bytes(src, i)) { return true; } @@ -67,7 +72,7 @@ public static unsafe bool CheckNonOpaque(Span row) for (; i + 32 <= length; i += 32) { - if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) + if (IsNoneOpaque32Bytes(src, i)) { return true; } @@ -85,16 +90,13 @@ public static unsafe bool CheckNonOpaque(Span row) else if (Sse2.IsSupported) { ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row); - var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); - Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte(); - int i = 0; int length = (row.Length * 4) - 3; fixed (byte* src = rowBytes) { for (; i + 64 <= length; i += 64) { - if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) + if (IsNoneOpaque64Bytes(src, i)) { return true; } @@ -102,7 +104,7 @@ public static unsafe bool CheckNonOpaque(Span row) for (; i + 32 <= length; i += 32) { - if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) + if (IsNoneOpaque32Bytes(src, i)) { return true; } @@ -133,20 +135,20 @@ public static unsafe bool CheckNonOpaque(Span row) } #if SUPPORTS_RUNTIME_INTRINSICS - private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i, Vector128 alphaMask, Vector128 all0x80) + private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i) { Vector128 a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte(); Vector128 a2 = Sse2.LoadVector128(src + i + 32).AsByte(); Vector128 a3 = Sse2.LoadVector128(src + i + 48).AsByte(); - Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32(); - Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32(); - Vector128 b2 = Sse2.And(a2, alphaMask).AsInt32(); - Vector128 b3 = Sse2.And(a3, alphaMask).AsInt32(); + Vector128 b0 = Sse2.And(a0, AlphaMask).AsInt32(); + Vector128 b1 = Sse2.And(a1, AlphaMask).AsInt32(); + Vector128 b2 = Sse2.And(a2, AlphaMask).AsInt32(); + Vector128 b3 = Sse2.And(a3, AlphaMask).AsInt32(); Vector128 c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128 c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16(); Vector128 d = Sse2.PackSignedSaturate(c0, c1).AsByte(); - Vector128 bits = Sse2.CompareEqual(d, all0x80); + Vector128 bits = Sse2.CompareEqual(d, All0x80); int mask = Sse2.MoveMask(bits); if (mask != 0xFFFF) { @@ -156,15 +158,15 @@ private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i, Vector128 return false; } - private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i, Vector128 alphaMask, Vector128 all0x80) + private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i) { Vector128 a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte(); - Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32(); - Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32(); + Vector128 b0 = Sse2.And(a0, AlphaMask).AsInt32(); + Vector128 b1 = Sse2.And(a1, AlphaMask).AsInt32(); Vector128 c = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128 d = Sse2.PackSignedSaturate(c, c).AsByte(); - Vector128 bits = Sse2.CompareEqual(d, all0x80); + Vector128 bits = Sse2.CompareEqual(d, All0x80); int mask = Sse2.MoveMask(bits); if (mask != 0xFFFF) { From e51f5008c3a53f203d0d9f21957146f95a6bf17b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 16:51:37 +0100 Subject: [PATCH 11/85] Add AggressiveInlining to LevelCosts --- src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs index 2962ebbabc..4eeeedd376 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Webp.Lossy { @@ -151,6 +152,7 @@ public int GetResidualCost(int ctx0) return cost; } + [MethodImpl(InliningOptions.ShortMethod)] private static int LevelCost(Span table, int level) => WebpLookupTables.Vp8LevelFixedCosts[level] + table[level > WebpConstants.MaxVariableLevel ? WebpConstants.MaxVariableLevel : level]; From e4352b9e0bcb160732fa63b88e1bd7dcf05c0dd6 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 19:29:59 +0100 Subject: [PATCH 12/85] Use byte arrays instead of Dictionary's for lookups --- .../Formats/Webp/Lossy/LossyUtils.cs | 48 ++-- .../Formats/Webp/WebpLookupTables.cs | 243 +++++++++++++++--- 2 files changed, 234 insertions(+), 57 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 1584237b0c..1a6ace16fa 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -934,11 +934,11 @@ private static void DoFilter2(Span p, int offset, int step) int p0 = p[offset - step]; int q0 = p[offset]; int q1 = p[offset + step]; - int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1]; - int a1 = WebpLookupTables.Sclip2[(a + 4) >> 3]; - int a2 = WebpLookupTables.Sclip2[(a + 3) >> 3]; - p[offset - step] = WebpLookupTables.Clip1[p0 + a2]; - p[offset] = WebpLookupTables.Clip1[q0 - a1]; + int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1 + 1020]; + int a1 = WebpLookupTables.Sclip2[((a + 4) >> 3) + 112]; + int a2 = WebpLookupTables.Sclip2[((a + 3) >> 3) + 112]; + p[offset - step] = WebpLookupTables.Clip1[p0 + a2 + 255]; + p[offset] = WebpLookupTables.Clip1[q0 - a1 + 255]; } private static void DoFilter4(Span p, int offset, int step) @@ -950,13 +950,13 @@ private static void DoFilter4(Span p, int offset, int step) int q0 = p[offset]; int q1 = p[offset + step]; int a = 3 * (q0 - p0); - int a1 = WebpLookupTables.Sclip2[(a + 4) >> 3]; - int a2 = WebpLookupTables.Sclip2[(a + 3) >> 3]; + int a1 = WebpLookupTables.Sclip2[((a + 4) >> 3) + 112]; + int a2 = WebpLookupTables.Sclip2[((a + 3) >> 3) + 112]; int a3 = (a1 + 1) >> 1; - p[offsetMinus2Step] = WebpLookupTables.Clip1[p1 + a3]; - p[offset - step] = WebpLookupTables.Clip1[p0 + a2]; - p[offset] = WebpLookupTables.Clip1[q0 - a1]; - p[offset + step] = WebpLookupTables.Clip1[q1 - a3]; + p[offsetMinus2Step] = WebpLookupTables.Clip1[p1 + a3 + 255]; + p[offset - step] = WebpLookupTables.Clip1[p0 + a2 + 255]; + p[offset] = WebpLookupTables.Clip1[q0 - a1 + 255]; + p[offset + step] = WebpLookupTables.Clip1[q1 - a3 + 255]; } private static void DoFilter6(Span p, int offset, int step) @@ -971,18 +971,18 @@ private static void DoFilter6(Span p, int offset, int step) int q0 = p[offset]; int q1 = p[offset + step]; int q2 = p[offset + step2]; - int a = WebpLookupTables.Sclip1[(3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1]]; + int a = WebpLookupTables.Sclip1[(3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1 + 1020] + 1020]; // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9] int a1 = ((27 * a) + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 int a2 = ((18 * a) + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 int a3 = ((9 * a) + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 - p[offset - step3] = WebpLookupTables.Clip1[p2 + a3]; - p[offset - step2] = WebpLookupTables.Clip1[p1 + a2]; - p[offsetMinusStep] = WebpLookupTables.Clip1[p0 + a1]; - p[offset] = WebpLookupTables.Clip1[q0 - a1]; - p[offset + step] = WebpLookupTables.Clip1[q1 - a2]; - p[offset + step2] = WebpLookupTables.Clip1[q2 - a3]; + p[offset - step3] = WebpLookupTables.Clip1[p2 + a3 + 255]; + p[offset - step2] = WebpLookupTables.Clip1[p1 + a2 + 255]; + p[offsetMinusStep] = WebpLookupTables.Clip1[p0 + a1 + 255]; + p[offset] = WebpLookupTables.Clip1[q0 - a1 + 255]; + p[offset + step] = WebpLookupTables.Clip1[q1 - a2 + 255]; + p[offset + step2] = WebpLookupTables.Clip1[q2 - a3 + 255]; } [MethodImpl(InliningOptions.ShortMethod)] @@ -992,7 +992,7 @@ private static bool NeedsFilter(Span p, int offset, int step, int t) int p0 = p[offset - step]; int q0 = p[offset]; int q1 = p[offset + step]; - return (4 * WebpLookupTables.Abs0[p0 - q0]) + WebpLookupTables.Abs0[p1 - q1] <= t; + return (4 * WebpLookupTables.Abs0[p0 - q0 + 255]) + WebpLookupTables.Abs0[p1 - q1 + 255] <= t; } private static bool NeedsFilter2(Span p, int offset, int step, int t, int it) @@ -1007,14 +1007,14 @@ private static bool NeedsFilter2(Span p, int offset, int step, int t, int int q1 = p[offset + step]; int q2 = p[offset + step2]; int q3 = p[offset + step3]; - if ((4 * WebpLookupTables.Abs0[p0 - q0]) + WebpLookupTables.Abs0[p1 - q1] > t) + if ((4 * WebpLookupTables.Abs0[p0 - q0 + 255]) + WebpLookupTables.Abs0[p1 - q1 + 255] > t) { return false; } - return WebpLookupTables.Abs0[p3 - p2] <= it && WebpLookupTables.Abs0[p2 - p1] <= it && - WebpLookupTables.Abs0[p1 - p0] <= it && WebpLookupTables.Abs0[q3 - q2] <= it && - WebpLookupTables.Abs0[q2 - q1] <= it && WebpLookupTables.Abs0[q1 - q0] <= it; + return WebpLookupTables.Abs0[p3 - p2 + 255] <= it && WebpLookupTables.Abs0[p2 - p1 + 255] <= it && + WebpLookupTables.Abs0[p1 - p0 + 255] <= it && WebpLookupTables.Abs0[q3 - q2 + 255] <= it && + WebpLookupTables.Abs0[q2 - q1 + 255] <= it && WebpLookupTables.Abs0[q1 - q0 + 255] <= it; } [MethodImpl(InliningOptions.ShortMethod)] @@ -1024,7 +1024,7 @@ private static bool Hev(Span p, int offset, int step, int thresh) int p0 = p[offset - step]; int q0 = p[offset]; int q1 = p[offset + step]; - return WebpLookupTables.Abs0[p1 - p0] > thresh || WebpLookupTables.Abs0[q1 - q0] > thresh; + return WebpLookupTables.Abs0[p1 - p0 + 255] > thresh || WebpLookupTables.Abs0[q1 - q0 + 255] > thresh; } [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs index 57b5739c79..768f4a8da3 100644 --- a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs +++ b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs @@ -2,21 +2,12 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Collections.Generic; namespace SixLabors.ImageSharp.Formats.Webp { #pragma warning disable SA1201 // Elements should appear in the correct order internal static class WebpLookupTables { - public static readonly Dictionary Abs0; - - public static readonly Dictionary Clip1; - - public static readonly Dictionary Sclip1; - - public static readonly Dictionary Sclip2; - public static readonly byte[,][] ModesProba = new byte[10, 10][]; public static readonly ushort[] GammaToLinearTab = new ushort[256]; @@ -54,6 +45,216 @@ internal static class WebpLookupTables 8 + (0 * WebpConstants.Bps), 12 + (0 * WebpConstants.Bps), 8 + (4 * WebpConstants.Bps), 12 + (4 * WebpConstants.Bps) // V }; + public static readonly byte[] Abs0 = + { + 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xef, + 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8, 0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, + 0xdd, 0xdc, 0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0, 0xcf, 0xce, 0xcd, + 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4, 0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, + 0xbb, 0xba, 0xb9, 0xb8, 0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac, 0xab, + 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0, 0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, + 0x99, 0x98, 0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, + 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c, 0x7b, 0x7a, 0x79, 0x78, + 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70, 0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, + 0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58, 0x57, 0x56, + 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, + 0x44, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, + 0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, + 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, + 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, + 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, + 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, + 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, + 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, + 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, + 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, + 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, + 0xff + }; + + public static readonly sbyte[] Sclip1 = + { + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -127, -126, -125, -124, -123, -122, -121, -120, + -119, -118, -117, -116, -115, -114, -113, -112, -111, -110, -109, -108, -107, -106, -105, -104, -103, + -102, -101, -100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89, -88, -87, -86, -85, -84, -83, + -82, -81, -80, -79, -78, -77, -76, -75, -74, -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62, + -61, -60, -59, -58, -57, -56, -55, -54, -53, -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41, + -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, + -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127 + }; + + public static readonly sbyte[] Sclip2 = + { + -112, -111, -110, -109, -108, -107, -106, -105, -104, -103, -102, -101, -100, -99, -98, -97, -96, -95, + -94, -93, -92, -91, -90, -89, -88, -87, -86, -85, -84, -83, -82, -81, -80, -79, -78, -77, -76, -75, -74, + -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53, + -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, + -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, + -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112 + }; + + public static readonly byte[] Clip1 = + { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, + 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, + 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, + 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, + 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, + 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, + 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, + 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff + }; + // fixed costs for coding levels, deduce from the coding tree. // This is only the part that doesn't depend on the probability state. public static readonly short[] Vp8LevelFixedCosts = @@ -1233,30 +1434,6 @@ static WebpLookupTables() LinearToGammaTab[v] = (int)((255.0d * Math.Pow(scale * v, 1.0d / WebpConstants.Gamma)) + .5); } - Abs0 = new Dictionary(); - for (int i = -255; i <= 255; i++) - { - Abs0[i] = (byte)((i < 0) ? -i : i); - } - - Clip1 = new Dictionary(); - for (int i = -255; i <= 255 + 255; i++) - { - Clip1[i] = (byte)(i < 0 ? 0 : i > 255 ? 255 : i); - } - - Sclip1 = new Dictionary(); - for (int i = -1020; i <= 1020; i++) - { - Sclip1[i] = (sbyte)(i < -128 ? -128 : i > 127 ? 127 : i); - } - - Sclip2 = new Dictionary(); - for (int i = -112; i <= 112; i++) - { - Sclip2[i] = (sbyte)(i < -16 ? -16 : i > 15 ? 15 : i); - } - InitializeModesProbabilities(); InitializeFixedCostsI4(); } From 414e4a861db47a81482786abd9ebe6fff3748d58 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 20:00:39 +0100 Subject: [PATCH 13/85] Fix Sclip2 values --- .../Formats/Webp/WebpLookupTables.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs index 768f4a8da3..98cf3029fa 100644 --- a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs +++ b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs @@ -193,16 +193,16 @@ internal static class WebpLookupTables public static readonly sbyte[] Sclip2 = { - -112, -111, -110, -109, -108, -107, -106, -105, -104, -103, -102, -101, -100, -99, -98, -97, -96, -95, - -94, -93, -92, -91, -90, -89, -88, -87, -86, -85, -84, -83, -82, -81, -80, -79, -78, -77, -76, -75, -74, - -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53, - -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, - -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, - -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112 + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -15, -14, -13, -12, -11, -10, -9, -8, + -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 }; public static readonly byte[] Clip1 = From ef90575a119335314ea69c4cbd556469d91f032f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 1 Nov 2021 21:42:32 +1100 Subject: [PATCH 14/85] Revert "Use RgbaVector for color backing" This reverts commit 257ff1929e341e5b1af94d9adf557e5296ece957. --- src/ImageSharp/Color/Color.Conversions.cs | 87 +++---------------- src/ImageSharp/Color/Color.cs | 74 ++++++++-------- .../Color/ColorTests.CastFrom.cs | 17 +--- .../Color/ColorTests.ConstructFrom.cs | 4 +- 4 files changed, 57 insertions(+), 125 deletions(-) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index abcb54b807..0455fd26a4 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -17,90 +17,56 @@ public readonly partial struct Color /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba64 pixel) - { - RgbaVector vector = default; - vector.FromRgba64(pixel); - this.data = vector; - } + public Color(Rgba64 pixel) => this.data = pixel; /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba32 pixel) - { - RgbaVector vector = default; - vector.FromRgba32(pixel); - this.data = vector; - } + public Color(Rgba32 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Argb32 pixel) - { - RgbaVector vector = default; - vector.FromArgb32(pixel); - this.data = vector; - } + public Color(Argb32 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgra32 pixel) - { - RgbaVector vector = default; - vector.FromBgra32(pixel); - this.data = vector; - } + public Color(Bgra32 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgb24 pixel) - { - RgbaVector vector = default; - vector.FromRgb24(pixel); - this.data = vector; - } + public Color(Rgb24 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgr24 pixel) - { - RgbaVector vector = default; - vector.FromBgr24(pixel); - this.data = vector; - } + public Color(Bgr24 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Vector4 vector) - { - vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One); - this.data = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W); - } + public Color(Vector4 vector) => this.data = new Rgba64(vector); /// /// Converts a to . /// /// The . /// The . - public static explicit operator Vector4(Color color) => color.data.ToScaledVector4(); + public static explicit operator Vector4(Color color) => color.data.ToVector4(); /// /// Converts an to . @@ -108,47 +74,22 @@ public Color(Vector4 vector) /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static explicit operator Color(Vector4 source) => new(source); + public static explicit operator Color(Vector4 source) => new Color(source); [MethodImpl(InliningOptions.ShortMethod)] - internal Rgba32 ToRgba32() - { - Rgba32 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Rgba32 ToRgba32() => this.data.ToRgba32(); [MethodImpl(InliningOptions.ShortMethod)] - internal Bgra32 ToBgra32() - { - Bgra32 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Bgra32 ToBgra32() => this.data.ToBgra32(); [MethodImpl(InliningOptions.ShortMethod)] - internal Argb32 ToArgb32() - { - Argb32 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Argb32 ToArgb32() => this.data.ToArgb32(); [MethodImpl(InliningOptions.ShortMethod)] - internal Rgb24 ToRgb24() - { - Rgb24 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Rgb24 ToRgb24() => this.data.ToRgb24(); [MethodImpl(InliningOptions.ShortMethod)] - internal Bgr24 ToBgr24() - { - Bgr24 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Bgr24 ToBgr24() => this.data.ToBgr24(); [MethodImpl(InliningOptions.ShortMethod)] internal Vector4 ToVector4() => this.data.ToVector4(); diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index 9a4df4e629..d5eedc160b 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -20,22 +20,26 @@ namespace SixLabors.ImageSharp /// public readonly partial struct Color : IEquatable { - private readonly RgbaVector data; + private readonly Rgba64 data; [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b, byte a) { - RgbaVector vector = default; - vector.FromRgba32(new(r, g, b, a)); - this.data = vector; + this.data = new Rgba64( + ColorNumerics.UpscaleFrom8BitTo16Bit(r), + ColorNumerics.UpscaleFrom8BitTo16Bit(g), + ColorNumerics.UpscaleFrom8BitTo16Bit(b), + ColorNumerics.UpscaleFrom8BitTo16Bit(a)); } [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b) { - RgbaVector vector = default; - vector.FromRgba32(new(r, g, b)); - this.data = vector; + this.data = new Rgba64( + ColorNumerics.UpscaleFrom8BitTo16Bit(r), + ColorNumerics.UpscaleFrom8BitTo16Bit(g), + ColorNumerics.UpscaleFrom8BitTo16Bit(b), + ushort.MaxValue); } /// @@ -48,7 +52,10 @@ private Color(byte r, byte g, byte b) /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator ==(Color left, Color right) => left.Equals(right); + public static bool operator ==(Color left, Color right) + { + return left.Equals(right); + } /// /// Checks whether two structures are equal. @@ -60,7 +67,10 @@ private Color(byte r, byte g, byte b) /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator !=(Color left, Color right) => !left.Equals(right); + public static bool operator !=(Color left, Color right) + { + return !left.Equals(right); + } /// /// Creates a from RGBA bytes. @@ -71,7 +81,7 @@ private Color(byte r, byte g, byte b) /// The alpha component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgba(byte r, byte g, byte b, byte a) => new(r, g, b, a); + public static Color FromRgba(byte r, byte g, byte b, byte a) => new Color(r, g, b, a); /// /// Creates a from RGB bytes. @@ -81,17 +91,7 @@ private Color(byte r, byte g, byte b) /// The blue component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgb(byte r, byte g, byte b) => new(r, g, b); - - /// - /// Creates a from the given . - /// - /// The pixel to convert from. - /// The pixel format. - /// The . - [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromPixel(TPixel pixel) - where TPixel : unmanaged, IPixel => new(pixel.ToScaledVector4()); + public static Color FromRgb(byte r, byte g, byte b) => new Color(r, g, b); /// /// Creates a new instance of the struct @@ -207,18 +207,13 @@ public Color WithAlpha(float alpha) /// /// A hexadecimal string representation of the value. [MethodImpl(InliningOptions.ShortMethod)] - public string ToHex() - { - Rgba32 rgba = default; - this.data.ToRgba32(ref rgba); - return rgba.ToHex(); - } + public string ToHex() => this.data.ToRgba32().ToHex(); /// public override string ToString() => this.ToHex(); /// - /// Converts the color instance to a specified type. + /// Converts the color instance to a specified type. /// /// The pixel type to convert to. /// The pixel value. @@ -227,12 +222,12 @@ public TPixel ToPixel() where TPixel : unmanaged, IPixel { TPixel pixel = default; - pixel.FromScaledVector4(this.data.ToScaledVector4()); + pixel.FromRgba64(this.data); return pixel; } /// - /// Bulk converts a span of to a span of a specified type. + /// Bulk converts a span of to a span of a specified type. /// /// The pixel type to convert to. /// The configuration. @@ -245,19 +240,28 @@ public static void ToPixel( Span destination) where TPixel : unmanaged, IPixel { - ReadOnlySpan rgbaSpan = MemoryMarshal.Cast(source); - PixelOperations.Instance.From(configuration, rgbaSpan, destination); + ReadOnlySpan rgba64Span = MemoryMarshal.Cast(source); + PixelOperations.Instance.FromRgba64(configuration, rgba64Span, destination); } /// [MethodImpl(InliningOptions.ShortMethod)] - public bool Equals(Color other) => this.data.Equals(other.data); + public bool Equals(Color other) + { + return this.data.PackedValue == other.data.PackedValue; + } /// - public override bool Equals(object obj) => obj is Color other && this.Equals(other); + public override bool Equals(object obj) + { + return obj is Color other && this.Equals(other); + } /// [MethodImpl(InliningOptions.ShortMethod)] - public override int GetHashCode() => this.data.GetHashCode(); + public override int GetHashCode() + { + return this.data.PackedValue.GetHashCode(); + } } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs index 356ef7351e..38b94f486c 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs @@ -66,7 +66,7 @@ public void Bgra32() [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: Color color = source; @@ -79,7 +79,7 @@ public void Rgb24() [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: Color color = source; @@ -88,19 +88,6 @@ public void Bgr24() Bgr24 data = color.ToPixel(); Assert.Equal(source, data); } - - [Fact] - public void TPixel() - { - var source = new RgbaVector(1, .1F, .133F, .864F); - - // Act: - var color = Color.FromPixel(source); - - // Assert: - RgbaVector data = color.ToPixel(); - Assert.Equal(source, data); - } } } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs b/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs index dd51f3a6c2..89276014b0 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs @@ -66,7 +66,7 @@ public void Bgra32() [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: var color = new Color(source); @@ -79,7 +79,7 @@ public void Rgb24() [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: var color = new Color(source); From 2ec17e7c6a31b31fafb75cfd85613681fa4125d6 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 1 Nov 2021 22:39:20 +1100 Subject: [PATCH 15/85] Use box pixel for high precision --- src/ImageSharp/Color/Color.Conversions.cs | 117 +++++++++++++++--- src/ImageSharp/Color/Color.cs | 77 ++++++++---- .../Color/ColorTests.CastTo.cs | 17 ++- 3 files changed, 171 insertions(+), 40 deletions(-) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index 0455fd26a4..424b7dcdfe 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -17,56 +17,85 @@ public readonly partial struct Color /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba64 pixel) => this.data = pixel; + public Color(Rgba64 pixel) + { + this.data = pixel; + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba32 pixel) => this.data = new Rgba64(pixel); + public Color(Rgba32 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Argb32 pixel) => this.data = new Rgba64(pixel); + public Color(Argb32 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgra32 pixel) => this.data = new Rgba64(pixel); + public Color(Bgra32 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgb24 pixel) => this.data = new Rgba64(pixel); + public Color(Rgb24 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgr24 pixel) => this.data = new Rgba64(pixel); + public Color(Bgr24 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Vector4 vector) => this.data = new Rgba64(vector); + public Color(Vector4 vector) + { + vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One); + this.boxedHighPrecisionPixel = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W); + this.data = default; + } /// /// Converts a to . /// /// The . /// The . - public static explicit operator Vector4(Color color) => color.data.ToVector4(); + public static explicit operator Vector4(Color color) => color.ToVector4(); /// /// Converts an to . @@ -74,24 +103,82 @@ public readonly partial struct Color /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static explicit operator Color(Vector4 source) => new Color(source); + public static explicit operator Color(Vector4 source) => new(source); [MethodImpl(InliningOptions.ShortMethod)] - internal Rgba32 ToRgba32() => this.data.ToRgba32(); + internal Rgba32 ToRgba32() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToRgba32(); + } + + Rgba32 value = default; + this.boxedHighPrecisionPixel.ToRgba32(ref value); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Bgra32 ToBgra32() => this.data.ToBgra32(); + internal Bgra32 ToBgra32() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToBgra32(); + } + + Bgra32 value = default; + value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Argb32 ToArgb32() => this.data.ToArgb32(); + internal Argb32 ToArgb32() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToArgb32(); + } + + Argb32 value = default; + value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Rgb24 ToRgb24() => this.data.ToRgb24(); + internal Rgb24 ToRgb24() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToRgb24(); + } + + Rgb24 value = default; + value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Bgr24 ToBgr24() => this.data.ToBgr24(); + internal Bgr24 ToBgr24() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToBgr24(); + } + + Bgr24 value = default; + value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Vector4 ToVector4() => this.data.ToVector4(); + internal Vector4 ToVector4() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToScaledVector4(); + } + + return this.boxedHighPrecisionPixel.ToScaledVector4(); + } } } diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index d5eedc160b..fe66efcfb5 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -4,7 +4,6 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp @@ -21,6 +20,7 @@ namespace SixLabors.ImageSharp public readonly partial struct Color : IEquatable { private readonly Rgba64 data; + private readonly IPixel boxedHighPrecisionPixel; [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b, byte a) @@ -30,6 +30,8 @@ private Color(byte r, byte g, byte b, byte a) ColorNumerics.UpscaleFrom8BitTo16Bit(g), ColorNumerics.UpscaleFrom8BitTo16Bit(b), ColorNumerics.UpscaleFrom8BitTo16Bit(a)); + + this.boxedHighPrecisionPixel = null; } [MethodImpl(InliningOptions.ShortMethod)] @@ -40,6 +42,15 @@ private Color(byte r, byte g, byte b) ColorNumerics.UpscaleFrom8BitTo16Bit(g), ColorNumerics.UpscaleFrom8BitTo16Bit(b), ushort.MaxValue); + + this.boxedHighPrecisionPixel = null; + } + + [MethodImpl(InliningOptions.ShortMethod)] + private Color(IPixel pixel) + { + this.boxedHighPrecisionPixel = pixel; + this.data = default; } /// @@ -52,13 +63,10 @@ private Color(byte r, byte g, byte b) /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator ==(Color left, Color right) - { - return left.Equals(right); - } + public static bool operator ==(Color left, Color right) => left.Equals(right); /// - /// Checks whether two structures are equal. + /// Checks whether two structures are not equal. /// /// The left hand operand. /// The right hand operand. @@ -67,10 +75,7 @@ private Color(byte r, byte g, byte b) /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator !=(Color left, Color right) - { - return !left.Equals(right); - } + public static bool operator !=(Color left, Color right) => !left.Equals(right); /// /// Creates a from RGBA bytes. @@ -81,7 +86,7 @@ private Color(byte r, byte g, byte b) /// The alpha component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgba(byte r, byte g, byte b, byte a) => new Color(r, g, b, a); + public static Color FromRgba(byte r, byte g, byte b, byte a) => new(r, g, b, a); /// /// Creates a from RGB bytes. @@ -91,7 +96,18 @@ private Color(byte r, byte g, byte b) /// The blue component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgb(byte r, byte g, byte b) => new Color(r, g, b); + public static Color FromRgb(byte r, byte g, byte b) => new(r, g, b); + + /// + /// Creates a from the given . + /// + /// The pixel to convert from. + /// The pixel format. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Color FromPixel(TPixel pixel) + where TPixel : unmanaged, IPixel + => new(pixel); /// /// Creates a new instance of the struct @@ -213,7 +229,7 @@ public Color WithAlpha(float alpha) public override string ToString() => this.ToHex(); /// - /// Converts the color instance to a specified type. + /// Converts the color instance to a specified type. /// /// The pixel type to convert to. /// The pixel value. @@ -221,13 +237,18 @@ public Color WithAlpha(float alpha) public TPixel ToPixel() where TPixel : unmanaged, IPixel { - TPixel pixel = default; + if (this.boxedHighPrecisionPixel is TPixel pixel) + { + return pixel; + } + + pixel = default; pixel.FromRgba64(this.data); return pixel; } /// - /// Bulk converts a span of to a span of a specified type. + /// Bulk converts a span of to a span of a specified type. /// /// The pixel type to convert to. /// The configuration. @@ -240,28 +261,38 @@ public static void ToPixel( Span destination) where TPixel : unmanaged, IPixel { - ReadOnlySpan rgba64Span = MemoryMarshal.Cast(source); - PixelOperations.Instance.FromRgba64(configuration, rgba64Span, destination); + Guard.DestinationShouldNotBeTooShort(source, destination, nameof(destination)); + for (int i = 0; i < source.Length; i++) + { + destination[i] = source[i].ToPixel(); + } } /// [MethodImpl(InliningOptions.ShortMethod)] public bool Equals(Color other) { - return this.data.PackedValue == other.data.PackedValue; + if (this.boxedHighPrecisionPixel is null && other.boxedHighPrecisionPixel is null) + { + return this.data.PackedValue == other.data.PackedValue; + } + + return this.ToVector4().Equals(other.ToVector4()); } /// - public override bool Equals(object obj) - { - return obj is Color other && this.Equals(other); - } + public override bool Equals(object obj) => obj is Color other && this.Equals(other); /// [MethodImpl(InliningOptions.ShortMethod)] public override int GetHashCode() { - return this.data.PackedValue.GetHashCode(); + if (this.boxedHighPrecisionPixel is null) + { + return this.data.PackedValue.GetHashCode(); + } + + return this.boxedHighPrecisionPixel.GetHashCode(); } } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs index ee1820de77..d3f3cf126e 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs @@ -66,7 +66,7 @@ public void Bgra32() [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: var color = new Color(source); @@ -79,7 +79,7 @@ public void Rgb24() [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: var color = new Color(source); @@ -88,6 +88,19 @@ public void Bgr24() Bgr24 data = color; Assert.Equal(source, data); } + + [Fact] + public void TPixel() + { + var source = new RgbaVector(1, .1F, .133F, .864F); + + // Act: + var color = Color.FromPixel(source); + + // Assert: + RgbaVector data = color.ToPixel(); + Assert.Equal(source, data); + } } } } From 67fd2d0427290e6a76eec0e49fb133986efbf3b6 Mon Sep 17 00:00:00 2001 From: Brian Popow <38701097+brianpopow@users.noreply.github.com> Date: Mon, 1 Nov 2021 13:07:39 +0100 Subject: [PATCH 16/85] Use ReadOnlySpan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Günther Foidl --- src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs | 3 ++- src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index abb7274472..c6dc6b8b23 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -17,7 +17,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// internal static unsafe class PredictorEncoder { - private static readonly sbyte[] DeltaLut = { 16, 16, 8, 4, 2, 2, 2 }; + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 }; private static readonly sbyte[][] Offset = { diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index c46e7193f2..1a9036ec95 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -85,7 +85,8 @@ internal class Vp8LEncoder : IDisposable private const int PaletteInvSize = 1 << PaletteInvSizeBits; - private static readonly byte[] Order = { 1, 2, 0, 3 }; + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan Order => new byte[] { 1, 2, 0, 3 }; /// /// Initializes a new instance of the class. From 86f4903c827635170e43cae57730bea4b951d6c7 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 1 Nov 2021 13:35:39 +0100 Subject: [PATCH 17/85] Fix build errors --- src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs | 6 +++--- src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index c6dc6b8b23..89c930561c 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -17,9 +17,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// internal static unsafe class PredictorEncoder { - // This uses C#'s compiler optimization to refer to assembly's static data directly. - private static ReadOnlySpan DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 }; - private static readonly sbyte[][] Offset = { new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } @@ -53,6 +50,9 @@ internal static unsafe class PredictorEncoder private static readonly Vector128 CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14); #endif + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 }; + /// /// Finds the best predictor for each tile, and converts the image to residuals /// with respect to predictions. If nearLosslessQuality < 100, applies diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 1a9036ec95..6a0a3184ed 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -85,9 +85,6 @@ internal class Vp8LEncoder : IDisposable private const int PaletteInvSize = 1 << PaletteInvSizeBits; - // This uses C#'s compiler optimization to refer to assembly's static data directly. - private static ReadOnlySpan Order => new byte[] { 1, 2, 0, 3 }; - /// /// Initializes a new instance of the class. /// @@ -140,6 +137,9 @@ public Vp8LEncoder( } } + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan Order => new byte[] { 1, 2, 0, 3 }; + /// /// Gets the memory for the image data as packed bgra values. /// From 94df8fc1ad8833c912e19f642df78d49cca091b8 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 1 Nov 2021 14:33:46 +0100 Subject: [PATCH 18/85] Small bitreader improvements: - Make bitmask static readonly - Add aggresive inlining - Change Guard to DebugGuard in ReadValue --- .../Formats/Webp/BitReader/Vp8LBitReader.cs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs index 601336fa4b..07423e3127 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs @@ -28,7 +28,7 @@ internal class Vp8LBitReader : BitReaderBase /// private const int Wbits = 32; - private readonly uint[] bitMask = + private static readonly uint[] BitMask = { 0, 0x000001, 0x000003, 0x000007, 0x00000f, @@ -125,13 +125,14 @@ public Vp8LBitReader(Stream inputStream, uint imageDataSize, MemoryAllocator mem /// /// The number of bits to read (should not exceed 16). /// A ushort value. + [MethodImpl(InliningOptions.ShortMethod)] public uint ReadValue(int nBits) { - Guard.MustBeGreaterThan(nBits, 0, nameof(nBits)); + DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits)); if (!this.Eos && nBits <= Vp8LMaxNumBitRead) { - ulong val = this.PrefetchBits() & this.bitMask[nBits]; + ulong val = this.PrefetchBits() & BitMask[nBits]; this.bitPos += nBits; this.ShiftBytes(); return (uint)val; @@ -169,6 +170,7 @@ public bool ReadBit() /// /// Advances the read buffer by 4 bytes to make room for reading next 32 bits. /// + [MethodImpl(InliningOptions.ShortMethod)] public void FillBitWindow() { if (this.bitPos >= Wbits) @@ -181,7 +183,8 @@ public void FillBitWindow() /// Returns true if there was an attempt at reading bit past the end of the buffer. /// /// True, if end of buffer was reached. - public bool IsEndOfStream() => this.Eos || ((this.pos == this.len) && (this.bitPos > Lbits)); + [MethodImpl(InliningOptions.ShortMethod)] + public bool IsEndOfStream() => this.Eos || (this.pos == this.len && this.bitPos > Lbits); [MethodImpl(InliningOptions.ShortMethod)] private void DoFillBitWindow() => this.ShiftBytes(); @@ -189,6 +192,7 @@ public void FillBitWindow() /// /// If not at EOS, reload up to Vp8LLbits byte-by-byte. /// + [MethodImpl(InliningOptions.ShortMethod)] private void ShiftBytes() { System.Span dataSpan = this.Data.Memory.Span; From 7d4fd642de5f08a87318fc19058dcbd9547e488a Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 1 Nov 2021 17:20:35 +0100 Subject: [PATCH 19/85] Use helper methods to access clip tables --- .../Formats/Webp/Lossy/LossyUtils.cs | 48 +- .../Formats/Webp/WebpLookupTables.cs | 425 +++++++++--------- 2 files changed, 243 insertions(+), 230 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 1a6ace16fa..04ff80b2d9 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -934,11 +934,11 @@ private static void DoFilter2(Span p, int offset, int step) int p0 = p[offset - step]; int q0 = p[offset]; int q1 = p[offset + step]; - int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1 + 1020]; - int a1 = WebpLookupTables.Sclip2[((a + 4) >> 3) + 112]; - int a2 = WebpLookupTables.Sclip2[((a + 3) >> 3) + 112]; - p[offset - step] = WebpLookupTables.Clip1[p0 + a2 + 255]; - p[offset] = WebpLookupTables.Clip1[q0 - a1 + 255]; + int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1(p1 - q1); + int a1 = WebpLookupTables.Sclip2((a + 4) >> 3); + int a2 = WebpLookupTables.Sclip2((a + 3) >> 3); + p[offset - step] = WebpLookupTables.Clip1(p0 + a2); + p[offset] = WebpLookupTables.Clip1(q0 - a1); } private static void DoFilter4(Span p, int offset, int step) @@ -950,13 +950,13 @@ private static void DoFilter4(Span p, int offset, int step) int q0 = p[offset]; int q1 = p[offset + step]; int a = 3 * (q0 - p0); - int a1 = WebpLookupTables.Sclip2[((a + 4) >> 3) + 112]; - int a2 = WebpLookupTables.Sclip2[((a + 3) >> 3) + 112]; + int a1 = WebpLookupTables.Sclip2((a + 4) >> 3); + int a2 = WebpLookupTables.Sclip2((a + 3) >> 3); int a3 = (a1 + 1) >> 1; - p[offsetMinus2Step] = WebpLookupTables.Clip1[p1 + a3 + 255]; - p[offset - step] = WebpLookupTables.Clip1[p0 + a2 + 255]; - p[offset] = WebpLookupTables.Clip1[q0 - a1 + 255]; - p[offset + step] = WebpLookupTables.Clip1[q1 - a3 + 255]; + p[offsetMinus2Step] = WebpLookupTables.Clip1(p1 + a3); + p[offset - step] = WebpLookupTables.Clip1(p0 + a2); + p[offset] = WebpLookupTables.Clip1(q0 - a1); + p[offset + step] = WebpLookupTables.Clip1(q1 - a3); } private static void DoFilter6(Span p, int offset, int step) @@ -971,18 +971,18 @@ private static void DoFilter6(Span p, int offset, int step) int q0 = p[offset]; int q1 = p[offset + step]; int q2 = p[offset + step2]; - int a = WebpLookupTables.Sclip1[(3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1 + 1020] + 1020]; + int a = WebpLookupTables.Sclip1((3 * (q0 - p0)) + WebpLookupTables.Sclip1(p1 - q1)); // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9] int a1 = ((27 * a) + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 int a2 = ((18 * a) + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 int a3 = ((9 * a) + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 - p[offset - step3] = WebpLookupTables.Clip1[p2 + a3 + 255]; - p[offset - step2] = WebpLookupTables.Clip1[p1 + a2 + 255]; - p[offsetMinusStep] = WebpLookupTables.Clip1[p0 + a1 + 255]; - p[offset] = WebpLookupTables.Clip1[q0 - a1 + 255]; - p[offset + step] = WebpLookupTables.Clip1[q1 - a2 + 255]; - p[offset + step2] = WebpLookupTables.Clip1[q2 - a3 + 255]; + p[offset - step3] = WebpLookupTables.Clip1(p2 + a3); + p[offset - step2] = WebpLookupTables.Clip1(p1 + a2); + p[offsetMinusStep] = WebpLookupTables.Clip1(p0 + a1); + p[offset] = WebpLookupTables.Clip1(q0 - a1); + p[offset + step] = WebpLookupTables.Clip1(q1 - a2); + p[offset + step2] = WebpLookupTables.Clip1(q2 - a3); } [MethodImpl(InliningOptions.ShortMethod)] @@ -992,7 +992,7 @@ private static bool NeedsFilter(Span p, int offset, int step, int t) int p0 = p[offset - step]; int q0 = p[offset]; int q1 = p[offset + step]; - return (4 * WebpLookupTables.Abs0[p0 - q0 + 255]) + WebpLookupTables.Abs0[p1 - q1 + 255] <= t; + return (4 * WebpLookupTables.Abs0(p0 - q0)) + WebpLookupTables.Abs0(p1 - q1) <= t; } private static bool NeedsFilter2(Span p, int offset, int step, int t, int it) @@ -1007,14 +1007,14 @@ private static bool NeedsFilter2(Span p, int offset, int step, int t, int int q1 = p[offset + step]; int q2 = p[offset + step2]; int q3 = p[offset + step3]; - if ((4 * WebpLookupTables.Abs0[p0 - q0 + 255]) + WebpLookupTables.Abs0[p1 - q1 + 255] > t) + if ((4 * WebpLookupTables.Abs0(p0 - q0)) + WebpLookupTables.Abs0(p1 - q1) > t) { return false; } - return WebpLookupTables.Abs0[p3 - p2 + 255] <= it && WebpLookupTables.Abs0[p2 - p1 + 255] <= it && - WebpLookupTables.Abs0[p1 - p0 + 255] <= it && WebpLookupTables.Abs0[q3 - q2 + 255] <= it && - WebpLookupTables.Abs0[q2 - q1 + 255] <= it && WebpLookupTables.Abs0[q1 - q0 + 255] <= it; + return WebpLookupTables.Abs0(p3 - p2) <= it && WebpLookupTables.Abs0(p2 - p1) <= it && + WebpLookupTables.Abs0(p1 - p0) <= it && WebpLookupTables.Abs0(q3 - q2) <= it && + WebpLookupTables.Abs0(q2 - q1) <= it && WebpLookupTables.Abs0(q1 - q0) <= it; } [MethodImpl(InliningOptions.ShortMethod)] @@ -1024,7 +1024,7 @@ private static bool Hev(Span p, int offset, int step, int thresh) int p0 = p[offset - step]; int q0 = p[offset]; int q1 = p[offset + step]; - return WebpLookupTables.Abs0[p1 - p0 + 255] > thresh || WebpLookupTables.Abs0[q1 - q0 + 255] > thresh; + return WebpLookupTables.Abs0(p1 - p0) > thresh || WebpLookupTables.Abs0(q1 - q0) > thresh; } [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs index 98cf3029fa..3b5d677293 100644 --- a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs +++ b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Webp { @@ -45,215 +46,17 @@ internal static class WebpLookupTables 8 + (0 * WebpConstants.Bps), 12 + (0 * WebpConstants.Bps), 8 + (4 * WebpConstants.Bps), 12 + (4 * WebpConstants.Bps) // V }; - public static readonly byte[] Abs0 = - { - 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xef, - 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8, 0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, - 0xdd, 0xdc, 0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0, 0xcf, 0xce, 0xcd, - 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4, 0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, - 0xbb, 0xba, 0xb9, 0xb8, 0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac, 0xab, - 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0, 0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, - 0x99, 0x98, 0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, - 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c, 0x7b, 0x7a, 0x79, 0x78, - 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70, 0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, - 0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58, 0x57, 0x56, - 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, - 0x44, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, - 0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, - 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, - 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, - 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, - 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, - 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, - 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, - 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, - 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, - 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, - 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, - 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, - 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, - 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, - 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, - 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, - 0xff - }; + [MethodImpl(InliningOptions.ShortMethod)] + public static byte Abs0(int x) => Abs0Table[x + 255]; - public static readonly sbyte[] Sclip1 = - { - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -127, -126, -125, -124, -123, -122, -121, -120, - -119, -118, -117, -116, -115, -114, -113, -112, -111, -110, -109, -108, -107, -106, -105, -104, -103, - -102, -101, -100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89, -88, -87, -86, -85, -84, -83, - -82, -81, -80, -79, -78, -77, -76, -75, -74, -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62, - -61, -60, -59, -58, -57, -56, -55, -54, -53, -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41, - -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, - -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, - 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, - 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127 - }; + [MethodImpl(InliningOptions.ShortMethod)] + public static sbyte Sclip1(int x) => Sclip1Table[x + 1020]; - public static readonly sbyte[] Sclip2 = - { - -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, - -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, - -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, - -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, - -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -15, -14, -13, -12, -11, -10, -9, -8, - -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 - }; + [MethodImpl(InliningOptions.ShortMethod)] + public static sbyte Sclip2(int x) => Sclip2Table[x + 112]; - public static readonly byte[] Clip1 = - { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, - 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, - 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, - 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, - 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, - 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, - 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, - 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, - 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, - 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, - 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, - 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, - 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, - 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff - }; + [MethodImpl(InliningOptions.ShortMethod)] + public static byte Clip1(int x) => Clip1Table[x + 255]; // fixed costs for coding levels, deduce from the coding tree. // This is only the part that doesn't depend on the probability state. @@ -1438,6 +1241,216 @@ static WebpLookupTables() InitializeFixedCostsI4(); } + private static readonly byte[] Abs0Table = + { + 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xef, + 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8, 0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, + 0xdd, 0xdc, 0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0, 0xcf, 0xce, 0xcd, + 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4, 0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, + 0xbb, 0xba, 0xb9, 0xb8, 0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac, 0xab, + 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0, 0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, + 0x99, 0x98, 0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, + 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c, 0x7b, 0x7a, 0x79, 0x78, + 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70, 0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, + 0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58, 0x57, 0x56, + 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, + 0x44, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, + 0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, + 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, + 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, + 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, + 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, + 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, + 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, + 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, + 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, + 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, + 0xff + }; + + private static readonly byte[] Clip1Table = + { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, + 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, + 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, + 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, + 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, + 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, + 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, + 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff + }; + + private static readonly sbyte[] Sclip1Table = + { + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -127, -126, -125, -124, -123, -122, -121, -120, + -119, -118, -117, -116, -115, -114, -113, -112, -111, -110, -109, -108, -107, -106, -105, -104, -103, + -102, -101, -100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89, -88, -87, -86, -85, -84, -83, + -82, -81, -80, -79, -78, -77, -76, -75, -74, -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62, + -61, -60, -59, -58, -57, -56, -55, -54, -53, -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41, + -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, + -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127 + }; + + private static readonly sbyte[] Sclip2Table = + { + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -15, -14, -13, -12, -11, -10, -9, -8, + -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 + }; + private static void InitializeModesProbabilities() { // Paragraph 11.5 From 853b1173697c0f56084eea21fd7d04f40764fa96 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 1 Nov 2021 19:46:24 +0100 Subject: [PATCH 20/85] Make histo and best histo array readonly --- src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 6a0a3184ed..da815a479a 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -24,9 +24,9 @@ internal class Vp8LEncoder : IDisposable /// private readonly int[] scratch = new int[256]; - private int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] }; + private readonly int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] }; - private int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] }; + private readonly int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] }; /// /// The to use for buffer allocations. From 35d2afa0bb4be7e50d26d5ae5435dbcaa6ece4c9 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 1 Nov 2021 20:18:21 +0100 Subject: [PATCH 21/85] Add sse2 version of select --- .../Formats/Webp/Lossless/LosslessUtils.cs | 60 +++++++++++++++---- .../Formats/Webp/Lossless/PredictorEncoder.cs | 27 +++++---- 2 files changed, 64 insertions(+), 23 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index b7f94415be..7e21517d20 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -27,6 +27,10 @@ internal static unsafe class LosslessUtils private const double Log2Reciprocal = 1.44269504088896338700465094007086; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 Zero = Vector128.Create(0).AsByte(); +#endif + /// /// Returns the exact index where array1 and array2 are different. For an index /// inferior or equal to bestLenMatch, the return value just has to be strictly @@ -551,6 +555,7 @@ public static void PredictorInverseTransform( int mask = tileWidth - 1; int tilesPerRow = SubSampleSize(width, transform.Bits); int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow; + Span scratch = stackalloc short[8]; while (y < yEnd) { int predictorModeIdx = predictorModeIdxBase; @@ -608,7 +613,7 @@ public static void PredictorInverseTransform( PredictorAdd10(input + x, output + x - width, xEnd - x, output + x); break; case 11: - PredictorAdd11(input + x, output + x - width, xEnd - x, output + x); + PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch); break; case 12: PredictorAdd12(input + x, output + x - width, xEnd - x, output + x); @@ -974,11 +979,11 @@ private static void PredictorAdd10(uint* input, uint* upper, int numberOfPixels, } [MethodImpl(InliningOptions.ShortMethod)] - private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output) + private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span scratch) { for (int x = 0; x < numberOfPixels; x++) { - uint pred = Predictor11(output[x - 1], upper + x); + uint pred = Predictor11(output[x - 1], upper + x, scratch); output[x] = AddPixels(input[x], pred); } } @@ -1031,7 +1036,7 @@ private static void PredictorAdd13(uint* input, uint* upper, int numberOfPixels, public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]); [MethodImpl(InliningOptions.ShortMethod)] - public static uint Predictor11(uint left, uint* top) => Select(top[0], left, top[-1]); + public static uint Predictor11(uint left, uint* top, Span scratch) => Select(top[0], left, top[-1], scratch); [MethodImpl(InliningOptions.ShortMethod)] public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]); @@ -1148,11 +1153,11 @@ public static void PredictorSub10(uint* input, uint* upper, int numPixels, uint* } [MethodImpl(InliningOptions.ShortMethod)] - public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output) + public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span scratch) { for (int x = 0; x < numPixels; x++) { - uint pred = Predictor11(input[x - 1], upper + x); + uint pred = Predictor11(input[x - 1], upper + x, scratch); output[x] = SubPixels(input[x], pred); } } @@ -1240,14 +1245,43 @@ private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) private static Vector128 MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff)); #endif - private static uint Select(uint a, uint b, uint c) + private static uint Select(uint a, uint b, uint c, Span scratch) { - int paMinusPb = - Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) + - Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) + - Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) + - Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff)); - return paMinusPb <= 0 ? a : b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Span output = scratch; + fixed (short* p = output) + { + Vector128 a0 = Sse2.ConvertScalarToVector128UInt32(a).AsByte(); + Vector128 b0 = Sse2.ConvertScalarToVector128UInt32(b).AsByte(); + Vector128 c0 = Sse2.ConvertScalarToVector128UInt32(c).AsByte(); + Vector128 ac0 = Sse2.SubtractSaturate(a0, c0); + Vector128 ca0 = Sse2.SubtractSaturate(c0, a0); + Vector128 bc0 = Sse2.SubtractSaturate(b0, c0); + Vector128 cb0 = Sse2.SubtractSaturate(c0, b0); + Vector128 ac = Sse2.Or(ac0, ca0); + Vector128 bc = Sse2.Or(bc0, cb0); + Vector128 pa = Sse2.UnpackLow(ac, Zero); // |a - c| + Vector128 pb = Sse2.UnpackLow(bc, Zero); // |b - c| + Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16()); + Sse2.Store((ushort*)p, diff); + } + + int paMinusPb = output[0] + output[1] + output[2] + output[3]; + + return (paMinusPb <= 0) ? a : b; + } + else +#endif + { + int paMinusPb = + Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) + + Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) + + Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) + + Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff)); + return paMinusPb <= 0 ? a : b; + } } [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index 671e9a043e..2c70faa0d8 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -50,6 +50,7 @@ public static void ResidualImage( int tilesPerRow = LosslessUtils.SubSampleSize(width, bits); int tilesPerCol = LosslessUtils.SubSampleSize(height, bits); int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality); + Span scratch = stackalloc short[8]; // TODO: Can we optimize this? int[][] histo = new int[4][]; @@ -84,7 +85,8 @@ public static void ResidualImage( transparentColorMode, usedSubtractGreen, nearLossless, - image); + image, + scratch); image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8)); } @@ -192,7 +194,8 @@ private static int GetBestPredictorForTile( WebpTransparentColorMode transparentColorMode, bool usedSubtractGreen, bool nearLossless, - Span modes) + Span modes, + Span scratch) { const int numPredModes = 14; int startX = tileX << bits; @@ -272,7 +275,7 @@ private static int GetBestPredictorForTile( } } - GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals); + GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals, scratch); for (int relativeX = 0; relativeX < maxX; ++relativeX) { UpdateHisto(histoArgb, residuals[relativeX]); @@ -333,11 +336,12 @@ private static void GetResidual( WebpTransparentColorMode transparentColorMode, bool usedSubtractGreen, bool nearLossless, - Span output) + Span output, + Span scratch) { if (transparentColorMode == WebpTransparentColorMode.Preserve) { - PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output); + PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output, scratch); } else { @@ -395,7 +399,7 @@ private static void GetResidual( predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x); break; case 11: - predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x); + predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x, scratch); break; case 12: predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x); @@ -583,6 +587,7 @@ private static void CopyImageWithPrediction( Span currentMaxDiffs = MemoryMarshal.Cast(currentRow.Slice(width + 1)); Span lowerMaxDiffs = currentMaxDiffs.Slice(width); + Span scratch = stackalloc short[8]; for (int y = 0; y < height; y++) { Span tmp32 = upperRow; @@ -593,7 +598,7 @@ private static void CopyImageWithPrediction( if (lowEffort) { - PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width)); + PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width), scratch); } else { @@ -634,7 +639,8 @@ private static void CopyImageWithPrediction( transparentColorMode, usedSubtractGreen, nearLossless, - argb.Slice((y * width) + x)); + argb.Slice((y * width) + x), + scratch); x = xEnd; } @@ -649,7 +655,8 @@ private static void PredictBatch( int numPixels, Span currentSpan, Span upperSpan, - Span outputSpan) + Span outputSpan, + Span scratch) { #pragma warning disable SA1503 // Braces should not be omitted fixed (uint* current = currentSpan) @@ -718,7 +725,7 @@ private static void PredictBatch( LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output); break; case 11: - LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output); + LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output, scratch); break; case 12: LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output); From de6bd9de7953d693b6e1a04007b2796507f65e0f Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 1 Nov 2021 21:29:10 +0100 Subject: [PATCH 22/85] Use Vector128.Zero --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 7e21517d20..22c2333607 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -27,10 +27,6 @@ internal static unsafe class LosslessUtils private const double Log2Reciprocal = 1.44269504088896338700465094007086; -#if SUPPORTS_RUNTIME_INTRINSICS - private static readonly Vector128 Zero = Vector128.Create(0).AsByte(); -#endif - /// /// Returns the exact index where array1 and array2 are different. For an index /// inferior or equal to bestLenMatch, the return value just has to be strictly @@ -1262,8 +1258,8 @@ private static uint Select(uint a, uint b, uint c, Span scratch) Vector128 cb0 = Sse2.SubtractSaturate(c0, b0); Vector128 ac = Sse2.Or(ac0, ca0); Vector128 bc = Sse2.Or(bc0, cb0); - Vector128 pa = Sse2.UnpackLow(ac, Zero); // |a - c| - Vector128 pb = Sse2.UnpackLow(bc, Zero); // |b - c| + Vector128 pa = Sse2.UnpackLow(ac, Vector128.Zero); // |a - c| + Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c| Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16()); Sse2.Store((ushort*)p, diff); } From 143de220b75abd8bf44f7943650a36cbaa3f7421 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 10:55:49 +0100 Subject: [PATCH 23/85] Add Predictor11 test --- .../Formats/WebP/LosslessUtilsTests.cs | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs index be7bc27d3a..bf381ebdaa 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs @@ -132,6 +132,30 @@ private static void RunTransformColorInverseTest() Assert.Equal(expectedOutput, pixelData); } + private static void RunPredictor11Test() + { + // arrange + uint[] topData = { 4278258949, 4278258949 }; + uint left = 4294839812; + short[] scratch = new short[8]; + uint expectedResult = 4294839812; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor11(left, top, scratch); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + + [Fact] + public void Predictor11_Works() => RunPredictor11Test(); + [Fact] public void SubtractGreen_Works() => RunSubtractGreenTest(); @@ -145,6 +169,12 @@ private static void RunTransformColorInverseTest() public void TransformColorInverse_Works() => RunTransformColorInverseTest(); #if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void Predictor11_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2); + [Fact] public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll); From fd07436736d721bedfbafc308d902aa1e7765778 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 12:40:04 +0100 Subject: [PATCH 24/85] Replace Guard with DebugGuard in FastSLog2Slow --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 22c2333607..ebebe79547 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -780,7 +780,7 @@ public static void ColorCodeToMultipliers(uint colorCode, ref Vp8LMultipliers m) private static float FastSLog2Slow(uint v) { - Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); + DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); if (v < ApproxLogWithCorrectionMax) { int logCnt = 0; From 2bf16bcb58556d6f3cbee5298472db42af60bd02 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 12:41:43 +0100 Subject: [PATCH 25/85] Reverse access to output array to remove bounds checks --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index ebebe79547..b278b12bc9 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1262,11 +1262,9 @@ private static uint Select(uint a, uint b, uint c, Span scratch) Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c| Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16()); Sse2.Store((ushort*)p, diff); + int paMinusPb = output[3] + output[2] + output[1] + output[0]; + return (paMinusPb <= 0) ? a : b; } - - int paMinusPb = output[0] + output[1] + output[2] + output[3]; - - return (paMinusPb <= 0) ? a : b; } else #endif From a7ed1884e0f9439c03d913f4d4a5f2b36d38071e Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:15:13 +0100 Subject: [PATCH 26/85] Add sse2 version of ClampedAddSubtractHalf --- .../Formats/Webp/Lossless/LosslessUtils.cs | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index b278b12bc9..0dda5a79a6 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1219,12 +1219,32 @@ private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2) private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) { - uint ave = Average2(c0, c1); - int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); - int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); - int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); - int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); - return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); + Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); + Vector128 b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); + Vector128 avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16()); + Vector128 a0 = Sse2.ShiftRightLogical(avg, 1); + Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16()); + Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16()); + Vector128 a2 = Sse2.Subtract(a1, bgta); + Vector128 a3 = Sse2.ShiftRightArithmetic(a2.AsInt16(), 1); + Vector128 a4 = Sse2.Add(a0.AsInt16(), a3).AsInt16(); + Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4); + uint output = Sse2.ConvertToUInt32(a5.AsUInt32()); + return output; + } +#endif + { + uint ave = Average2(c0, c1); + int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); + int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); + int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); + int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); + return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; + } } [MethodImpl(InliningOptions.ShortMethod)] From 28053739a9beeed006fd256a0ea8016631660841 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:20:33 +0100 Subject: [PATCH 27/85] Add sse2 version of ClampedAddSubtractFull --- .../Formats/Webp/Lossless/LosslessUtils.cs | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 0dda5a79a6..7740dc0515 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1201,20 +1201,34 @@ public static uint AddPixels(uint a, uint b) private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2) { - int a = AddSubtractComponentFull( - (int)(c0 >> 24), - (int)(c1 >> 24), - (int)(c2 >> 24)); - int r = AddSubtractComponentFull( - (int)((c0 >> 16) & 0xff), - (int)((c1 >> 16) & 0xff), - (int)((c2 >> 16) & 0xff)); - int g = AddSubtractComponentFull( - (int)((c0 >> 8) & 0xff), - (int)((c1 >> 8) & 0xff), - (int)((c2 >> 8) & 0xff)); - int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff)); - return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); + Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); + Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); + Vector128 v1 = Sse2.Add(c0Vec, c1Vec); + Vector128 v2 = Sse2.Subtract(v1, c2Vec); + Vector128 b = Sse2.PackUnsignedSaturate(v2.AsInt16(), v2.AsInt16()); + uint output = Sse2.ConvertToUInt32(b.AsUInt32()); + } +#endif + { + int a = AddSubtractComponentFull( + (int)(c0 >> 24), + (int)(c1 >> 24), + (int)(c2 >> 24)); + int r = AddSubtractComponentFull( + (int)((c0 >> 16) & 0xff), + (int)((c1 >> 16) & 0xff), + (int)((c2 >> 16) & 0xff)); + int g = AddSubtractComponentFull( + (int)((c0 >> 8) & 0xff), + (int)((c1 >> 8) & 0xff), + (int)((c2 >> 8) & 0xff)); + int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff)); + return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; + } } private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) From f6dbc7dd8ee95115315805dab2b9b38684e505b2 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:40:59 +0100 Subject: [PATCH 28/85] Fix issue in ClampedAddSubtractFull --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 7740dc0515..65b39bd2d7 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1207,10 +1207,11 @@ private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2) Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); - Vector128 v1 = Sse2.Add(c0Vec, c1Vec); - Vector128 v2 = Sse2.Subtract(v1, c2Vec); + Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16()); + Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16()); Vector128 b = Sse2.PackUnsignedSaturate(v2.AsInt16(), v2.AsInt16()); uint output = Sse2.ConvertToUInt32(b.AsUInt32()); + return output; } #endif { From 8fe280e9918e14ca2abb7ffd21ae35c969429447 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 16:04:29 +0100 Subject: [PATCH 29/85] Add predictor 12 and 13 tests --- .../Formats/WebP/LosslessUtilsTests.cs | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs index bf381ebdaa..c70f332ef6 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs @@ -153,9 +153,55 @@ private static void RunPredictor11Test() } } + private static void RunPredictor12Test() + { + // arrange + uint[] topData = { 4294844413, 4294779388 }; + uint left = 4294844413; + uint expectedResult = 4294779388; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor12(left, top); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + + private static void RunPredictor13Test() + { + // arrange + uint[] topData = { 4278193922, 4278193666 }; + uint left = 4278193410; + uint expectedResult = 4278193154; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor13(left, top); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + [Fact] public void Predictor11_Works() => RunPredictor11Test(); + [Fact] + public void Predictor12_Works() => RunPredictor12Test(); + + [Fact] + public void Predictor13_Works() => RunPredictor13Test(); + [Fact] public void SubtractGreen_Works() => RunSubtractGreenTest(); @@ -175,6 +221,18 @@ private static void RunPredictor11Test() [Fact] public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2); + [Fact] + public void Predictor12_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor12_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.DisableSSE2); + + [Fact] + public void Predictor13_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor13_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.DisableSSE2); + [Fact] public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll); From ffdf99bad2d8f4fb9d52a3938f3c64d750f09957 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 16:29:52 +0100 Subject: [PATCH 30/85] Add aggressive inlining --- src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs | 8 ++++++++ src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 1 + 2 files changed, 9 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs index 8596d85558..02bbc38fcf 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs @@ -1,6 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Webp.Lossless { /// @@ -41,6 +43,7 @@ public void Init(int hashBits) /// Inserts a new color into the cache. /// /// The color to insert. + [MethodImpl(InliningOptions.ShortMethod)] public void Insert(uint bgra) { int key = HashPix(bgra, this.HashShift); @@ -52,6 +55,7 @@ public void Insert(uint bgra) /// /// The key to lookup. /// The color for the key. + [MethodImpl(InliningOptions.ShortMethod)] public uint Lookup(int key) => this.Colors[key]; /// @@ -59,6 +63,7 @@ public void Insert(uint bgra) /// /// The color to check. /// The index of the color in the cache or -1 if its not present. + [MethodImpl(InliningOptions.ShortMethod)] public int Contains(uint bgra) { int key = HashPix(bgra, this.HashShift); @@ -70,6 +75,7 @@ public int Contains(uint bgra) /// /// The color. /// The index for the color. + [MethodImpl(InliningOptions.ShortMethod)] public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift); /// @@ -77,8 +83,10 @@ public int Contains(uint bgra) /// /// The key. /// The color to add. + [MethodImpl(InliningOptions.ShortMethod)] public void Set(uint key, uint bgra) => this.Colors[key] = bgra; + [MethodImpl(InliningOptions.ShortMethod)] public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift); } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 65b39bd2d7..9baa6c3c33 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -752,6 +752,7 @@ public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint a /// /// Fast calculation of log2(v) for integer input. /// + [MethodImpl(InliningOptions.ShortMethod)] public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v); /// From fc8d8b81d98201955655595fe682a0c5533eb6ea Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 21:56:19 +0100 Subject: [PATCH 31/85] Remove unnecessary cast AsInt16() --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 9baa6c3c33..8bd3163ccb 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1210,7 +1210,7 @@ private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2) Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16()); Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16()); - Vector128 b = Sse2.PackUnsignedSaturate(v2.AsInt16(), v2.AsInt16()); + Vector128 b = Sse2.PackUnsignedSaturate(v2, v2); uint output = Sse2.ConvertToUInt32(b.AsUInt32()); return output; } From 1e4352b8a1a2468d8a34297c1650c3e7b8e19fb7 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Wed, 3 Nov 2021 10:25:02 +0100 Subject: [PATCH 32/85] Remove unnecessary SetEndOfStream, we already have read all bytes from the stream BitReaderBase --- .../Formats/Webp/BitReader/Vp8LBitReader.cs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs index 07423e3127..4df2feba81 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs @@ -138,7 +138,6 @@ public uint ReadValue(int nBits) return (uint)val; } - this.SetEndOfStream(); return 0; } @@ -203,17 +202,6 @@ private void ShiftBytes() ++this.pos; this.bitPos -= 8; } - - if (this.IsEndOfStream()) - { - this.SetEndOfStream(); - } - } - - private void SetEndOfStream() - { - this.Eos = true; - this.bitPos = 0; // To avoid undefined behaviour with shifts. } } } From 47794dfbcb192ec8c610a5e21d03da8b279ef5e1 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Wed, 3 Nov 2021 10:36:29 +0100 Subject: [PATCH 33/85] Change Guard to DebugGuard in ReadValue --- src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs index abf44127a9..d6ceca5bf5 100644 --- a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs +++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs @@ -142,10 +142,11 @@ public int GetSigned(int v) [MethodImpl(InliningOptions.ShortMethod)] public bool ReadBool() => this.ReadValue(1) is 1; + [MethodImpl(InliningOptions.ShortMethod)] public uint ReadValue(int nBits) { - Guard.MustBeGreaterThan(nBits, 0, nameof(nBits)); - Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits)); + DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits)); + DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits)); uint v = 0; while (nBits-- > 0) @@ -156,10 +157,11 @@ public uint ReadValue(int nBits) return v; } + [MethodImpl(InliningOptions.ShortMethod)] public int ReadSignedValue(int nBits) { - Guard.MustBeGreaterThan(nBits, 0, nameof(nBits)); - Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits)); + DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits)); + DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits)); int value = (int)this.ReadValue(nBits); return this.ReadValue(1) != 0 ? -value : value; From f9212f7adca384b1147af10a38e3ec0d8dcc12d2 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Nov 2021 22:38:52 +1100 Subject: [PATCH 34/85] Update tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs Co-authored-by: Anton Firszov --- tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs index d3f3cf126e..af35d1f895 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs @@ -92,7 +92,7 @@ public void Bgr24() [Fact] public void TPixel() { - var source = new RgbaVector(1, .1F, .133F, .864F); + var source = new RgbaVector(float.Epsilon, 2 * float.Epsilon, float.MaxValue, float.MinValue); // Act: var color = Color.FromPixel(source); From 4598b1461801d1893c61e66ae75d34d1249c4bf3 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Wed, 3 Nov 2021 13:00:05 +0100 Subject: [PATCH 35/85] Use ReadOnlySpan for byte and sbyte arrays --- .../Formats/Webp/WebpLookupTables.cs | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs index 3b5d677293..bf47b01bca 100644 --- a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs +++ b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs @@ -253,7 +253,8 @@ internal static class WebpLookupTables 0 }; - public static readonly byte[] NewRange = + // This uses C#'s compiler optimization to refer to assembly's static data directly. + public static ReadOnlySpan NewRange => new byte[] { // range = ((range + 1) << kVP8Log2Range[range]) - 1 127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239, @@ -571,7 +572,8 @@ internal static class WebpLookupTables }; // Paragraph 14.1 - public static readonly byte[] DcTable = + // This uses C#'s compiler optimization to refer to assembly's static data directly. + public static ReadOnlySpan DcTable => new byte[] { 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, @@ -1046,7 +1048,8 @@ public static readonly (int Code, int ExtraBits)[] PrefixEncodeCode = (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), }; - public static readonly byte[] PrefixEncodeExtraBitsValue = + // This uses C#'s compiler optimization to refer to assembly's static data directly. + public static ReadOnlySpan PrefixEncodeExtraBitsValue => new byte[] { 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, @@ -1241,7 +1244,8 @@ static WebpLookupTables() InitializeFixedCostsI4(); } - private static readonly byte[] Abs0Table = + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan Abs0Table => new byte[] { 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8, 0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, @@ -1276,7 +1280,8 @@ static WebpLookupTables() 0xff }; - private static readonly byte[] Clip1Table = + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan Clip1Table => new byte[] { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1326,7 +1331,8 @@ static WebpLookupTables() 0xff }; - private static readonly sbyte[] Sclip1Table = + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan Sclip1Table => new sbyte[] { -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, @@ -1437,7 +1443,8 @@ static WebpLookupTables() 127, 127, 127, 127, 127, 127, 127, 127, 127 }; - private static readonly sbyte[] Sclip2Table = + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan Sclip2Table => new sbyte[] { -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, From 425600459e96cc5d34857fd9e0de45952fa8e6ae Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Nov 2021 23:49:32 +1100 Subject: [PATCH 36/85] Update Color.Equals --- src/ImageSharp/Color/Color.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index fe66efcfb5..61d6c8e6d5 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -277,7 +277,7 @@ public bool Equals(Color other) return this.data.PackedValue == other.data.PackedValue; } - return this.ToVector4().Equals(other.ToVector4()); + return this.boxedHighPrecisionPixel?.Equals(other.boxedHighPrecisionPixel) == true; } /// From 08785103e350266f626b3519b22e3966b4450caa Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 4 Nov 2021 12:39:42 +0100 Subject: [PATCH 37/85] Add EntropyPasses default value explicit to 1 --- src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs | 1 + src/ImageSharp/Formats/Webp/WebpEncoder.cs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs index 7dbf49d45e..000de4f88c 100644 --- a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs +++ b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs @@ -35,6 +35,7 @@ internal interface IWebpEncoderOptions /// /// Gets the number of entropy-analysis passes (in [1..10]). + /// Defaults to 1. /// int EntropyPasses { get; } diff --git a/src/ImageSharp/Formats/Webp/WebpEncoder.cs b/src/ImageSharp/Formats/Webp/WebpEncoder.cs index f85f65b635..bdcbb194b1 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoder.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoder.cs @@ -27,7 +27,7 @@ public sealed class WebpEncoder : IImageEncoder, IWebpEncoderOptions public bool UseAlphaCompression { get; set; } /// - public int EntropyPasses { get; set; } + public int EntropyPasses { get; set; } = 1; /// public int SpatialNoiseShaping { get; set; } = 50; From 947dc8d5ecff64414247ede191452cf8c7a77c26 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 4 Nov 2021 12:40:39 +0100 Subject: [PATCH 38/85] Make sure magick.net and imagesharp use the same configuration --- .../Codecs/EncodeWebp.cs | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs index 7d3dfe693c..59814f465c 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs @@ -4,6 +4,7 @@ using System.IO; using BenchmarkDotNet.Attributes; using ImageMagick; +using ImageMagick.Formats; using SixLabors.ImageSharp.Formats.Webp; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Tests; @@ -44,8 +45,22 @@ public void Cleanup() public void MagickWebpLossy() { using var memoryStream = new MemoryStream(); - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "lossless", false); - this.webpMagick.Write(memoryStream, MagickFormat.WebP); + + var defines = new WebPWriteDefines + { + Lossless = false, + Method = 4, + AlphaCompression = WebPAlphaCompression.None, + FilterStrength = 60, + SnsStrength = 50, + Pass = 1, + + // 100 means off. + NearLossless = 100 + }; + + this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "quality", 75); + this.webpMagick.Write(memoryStream, defines); } [Benchmark(Description = "ImageSharp Webp Lossy")] @@ -54,7 +69,12 @@ public void ImageSharpWebpLossy() using var memoryStream = new MemoryStream(); this.webp.Save(memoryStream, new WebpEncoder() { - FileFormat = WebpFileFormatType.Lossy + FileFormat = WebpFileFormatType.Lossy, + Method = WebpEncodingMethod.Level4, + UseAlphaCompression = false, + FilterStrength = 60, + SpatialNoiseShaping = 50, + EntropyPasses = 1 }); } @@ -62,8 +82,18 @@ public void ImageSharpWebpLossy() public void MagickWebpLossless() { using var memoryStream = new MemoryStream(); - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "lossless", true); - this.webpMagick.Write(memoryStream, MagickFormat.WebP); + var defines = new WebPWriteDefines + { + Lossless = true, + Method = 4, + + // 100 means off. + NearLossless = 100 + }; + + this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "exact", false); + this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "quality", 75); + this.webpMagick.Write(memoryStream, defines); } [Benchmark(Description = "ImageSharp Webp Lossless")] @@ -72,7 +102,10 @@ public void ImageSharpWebpLossless() using var memoryStream = new MemoryStream(); this.webp.Save(memoryStream, new WebpEncoder() { - FileFormat = WebpFileFormatType.Lossless + FileFormat = WebpFileFormatType.Lossless, + Method = WebpEncodingMethod.Level4, + NearLossless = false, + TransparentColorMode = WebpTransparentColorMode.Clear }); } From 55b67ada2f659463f438303e77d0f1b1de4c47bc Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 4 Nov 2021 21:40:02 +0100 Subject: [PATCH 39/85] Use webpMagick.Quality for the quality parameter --- tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs index 59814f465c..2229849921 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs @@ -59,7 +59,7 @@ public void MagickWebpLossy() NearLossless = 100 }; - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "quality", 75); + this.webpMagick.Quality = 75; this.webpMagick.Write(memoryStream, defines); } @@ -91,8 +91,7 @@ public void MagickWebpLossless() NearLossless = 100 }; - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "exact", false); - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "quality", 75); + this.webpMagick.Quality = 75; this.webpMagick.Write(memoryStream, defines); } @@ -105,6 +104,8 @@ public void ImageSharpWebpLossless() FileFormat = WebpFileFormatType.Lossless, Method = WebpEncodingMethod.Level4, NearLossless = false, + + // This is equal to exact = false in libwebp, which is the default. TransparentColorMode = WebpTransparentColorMode.Clear }); } From d6d952e477b0653b2750210ad4cd2d3fc14bbaec Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 4 Nov 2021 23:12:01 +0100 Subject: [PATCH 40/85] Remove another unnecessary cast AsInt16() --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 8bd3163ccb..ee9ea51237 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1246,8 +1246,8 @@ private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16()); Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16()); Vector128 a2 = Sse2.Subtract(a1, bgta); - Vector128 a3 = Sse2.ShiftRightArithmetic(a2.AsInt16(), 1); - Vector128 a4 = Sse2.Add(a0.AsInt16(), a3).AsInt16(); + Vector128 a3 = Sse2.ShiftRightArithmetic(a2, 1); + Vector128 a4 = Sse2.Add(a0, a3).AsInt16(); Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4); uint output = Sse2.ConvertToUInt32(a5.AsUInt32()); return output; From e97c364b373ffcc8bf11295ee9597bff3af7b927 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 5 Nov 2021 12:40:26 +0100 Subject: [PATCH 41/85] Use AsSpan() parameters to slice --- src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs index 6320983bab..3c81f1a22c 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs @@ -203,10 +203,10 @@ public static void GenerateOptimalTree(HuffmanTree[] tree, uint[] histogram, int // Build the Huffman tree. #if NET5_0_OR_GREATER - Span treeSlice = tree.AsSpan().Slice(0, treeSize); + Span treeSlice = tree.AsSpan(0, treeSize); treeSlice.Sort(HuffmanTree.Compare); #else - HuffmanTree[] treeCopy = tree.AsSpan().Slice(0, treeSize).ToArray(); + HuffmanTree[] treeCopy = tree.AsSpan(0, treeSize).ToArray(); Array.Sort(treeCopy, HuffmanTree.Compare); treeCopy.AsSpan().CopyTo(tree); #endif From 2b6dbbce6fb6561a7fbddb0bd08afe69b9349382 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 5 Nov 2021 12:46:53 +0100 Subject: [PATCH 42/85] Update benchmark results --- .../Codecs/DecodeWebp.cs | 49 ++++++++--------- .../Codecs/EncodeWebp.cs | 55 +++++++++---------- 2 files changed, 48 insertions(+), 56 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs index 407a4ef3b2..878929823d 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs @@ -76,34 +76,29 @@ public int WebpLossless() return image.Height; } - /* Results 17.06.2021 - * BenchmarkDotNet=v0.12.0, OS=Windows 10.0.18362 + /* Results 04.11.2021 + * BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19043.1320 (21H1/May2021Update) Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores - .NET Core SDK=3.1.202 - [Host] : .NET Core 3.1.4 (CoreCLR 4.700.20.20201, CoreFX 4.700.20.22101), X64 RyuJIT - Job-AQFZAV : .NET Framework 4.8 (4.8.4180.0), X64 RyuJIT - Job-YCDAPQ : .NET Core 2.1.18 (CoreCLR 4.6.28801.04, CoreFX 4.6.28802.05), X64 RyuJIT - Job-WMTYOZ : .NET Core 3.1.4 (CoreCLR 4.700.20.20201, CoreFX 4.700.20.22101), X64 RyuJIT - - IterationCount=3 LaunchCount=1 WarmupCount=3 - | Method | Job | Runtime | TestImageLossy | TestImageLossless | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated | - |--------------------------- |----------- |-------------- |---------------------- |------------------------- |-----------:|----------:|---------:|----------:|----------:|------:|------------:| - | 'Magick Lossy Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.8 ms | 6.28 ms | 0.34 ms | - | - | - | 17.65 KB | - | 'ImageSharp Lossy Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 1,145.0 ms | 110.82 ms | 6.07 ms | - | - | - | 2779.53 KB | - | 'Magick Lossless Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 145.9 ms | 8.55 ms | 0.47 ms | - | - | - | 18.05 KB | - | 'ImageSharp Lossless Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 1,694.1 ms | 55.09 ms | 3.02 ms | 4000.0000 | 1000.0000 | - | 30556.87 KB | - | 'Magick Lossy Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.7 ms | 1.89 ms | 0.10 ms | - | - | - | 15.75 KB | - | 'ImageSharp Lossy Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 741.6 ms | 21.45 ms | 1.18 ms | - | - | - | 2767.85 KB | - | 'Magick Lossless Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 146.1 ms | 9.52 ms | 0.52 ms | - | - | - | 16.54 KB | - | 'ImageSharp Lossless Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 522.5 ms | 21.15 ms | 1.16 ms | 4000.0000 | 1000.0000 | - | 22860.02 KB | - | 'Magick Lossy Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.9 ms | 5.34 ms | 0.29 ms | - | - | - | 15.45 KB | - | 'ImageSharp Lossy Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 748.8 ms | 290.47 ms | 15.92 ms | - | - | - | 2767.84 KB | - | 'Magick Lossless Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 146.1 ms | 1.14 ms | 0.06 ms | - | - | - | 15.9 KB | - | 'ImageSharp Lossless Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 480.7 ms | 25.25 ms | 1.38 ms | 4000.0000 | 1000.0000 | - | 22859.7 KB | - | 'Magick Lossy Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.7 ms | 4.71 ms | 0.26 ms | - | - | - | 15.48 KB | - | 'ImageSharp Lossy Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 645.7 ms | 61.00 ms | 3.34 ms | - | - | - | 2768.13 KB | - | 'Magick Lossless Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 146.5 ms | 18.63 ms | 1.02 ms | - | - | - | 15.8 KB | - | 'ImageSharp Lossless Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 306.7 ms | 32.31 ms | 1.77 ms | 4000.0000 | 1000.0000 | - | 22860.02 KB | + .NET SDK=6.0.100-rc.2.21505.57 + [Host] : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT + Job-WQLXJO : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT + Job-OJJAMD : .NET Core 3.1.20 (CoreCLR 4.700.21.47003, CoreFX 4.700.21.47101), X64 RyuJIT + Job-OMFOAS : .NET Framework 4.8 (4.8.4420.0), X64 RyuJIT + + | Method | Job | Runtime | Arguments | TestImageLossy | TestImageLossless | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated | + |--------------------------- |----------- |--------------------- |---------------------- |---------------------- |------------------------- |-----------:|----------:|--------:|---------:|------:|------:|----------:| + | 'Magick Lossy Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 107.9 ms | 28.91 ms | 1.58 ms | - | - | - | 25 KB | + | 'ImageSharp Lossy Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 282.3 ms | 25.40 ms | 1.39 ms | 500.0000 | - | - | 2,428 KB | + | 'Magick Lossless Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.3 ms | 11.99 ms | 0.66 ms | - | - | - | 16 KB | + | 'ImageSharp Lossless Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 280.2 ms | 6.21 ms | 0.34 ms | - | - | - | 2,092 KB | + | 'Magick Lossy Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.2 ms | 9.32 ms | 0.51 ms | - | - | - | 15 KB | + | 'ImageSharp Lossy Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 295.8 ms | 21.25 ms | 1.16 ms | 500.0000 | - | - | 2,427 KB | + | 'Magick Lossless Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.5 ms | 4.07 ms | 0.22 ms | - | - | - | 15 KB | + | 'ImageSharp Lossless Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 464.0 ms | 55.70 ms | 3.05 ms | - | - | - | 2,090 KB | + | 'Magick Lossy Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 108.0 ms | 29.60 ms | 1.62 ms | - | - | - | 32 KB | + | 'ImageSharp Lossy Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 564.9 ms | 29.69 ms | 1.63 ms | - | - | - | 2,436 KB | + | 'Magick Lossless Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.2 ms | 4.74 ms | 0.26 ms | - | - | - | 18 KB | + | 'ImageSharp Lossless Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 1,767.5 ms | 106.33 ms | 5.83 ms | - | - | - | 9,729 KB | */ } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs index 2229849921..43d8c464ce 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs @@ -110,37 +110,34 @@ public void ImageSharpWebpLossless() }); } - /* Results 17.06.2021 + /* Results 04.11.2021 * Summary * - BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.630 (2004/?/20H1) + BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19043.1320 (21H1/May2021Update) Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores - .NET Core SDK=5.0.100 - [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - Job-OUUGWL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT - Job-GAIITM : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT - Job-HWOBSO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - - | Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - |--------------------------- |----------- |-------------- |------------- |----------:|-----------:|----------:|------:|--------:|-----------:|----------:|----------:|-------------:| - | 'Magick Webp Lossy' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 23.30 ms | 0.869 ms | 0.048 ms | 0.14 | 0.00 | - | - | - | 68.19 KB | - | 'ImageSharp Webp Lossy' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 68.22 ms | 16.454 ms | 0.902 ms | 0.42 | 0.01 | 6125.0000 | 125.0000 | - | 26359.49 KB | - | 'Magick Webp Lossless' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 161.96 ms | 9.879 ms | 0.541 ms | 1.00 | 0.00 | - | - | - | 520.28 KB | - | 'ImageSharp Webp Lossless' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 370.88 ms | 58.875 ms | 3.227 ms | 2.29 | 0.02 | 34000.0000 | 5000.0000 | 2000.0000 | 163177.15 KB | - | | | | | | | | | | | | | | - | 'Magick Webp Lossy' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 23.35 ms | 0.428 ms | 0.023 ms | 0.14 | 0.00 | - | - | - | 67.76 KB | - | 'ImageSharp Webp Lossy' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 43.95 ms | 2.850 ms | 0.156 ms | 0.27 | 0.00 | 6250.0000 | 250.0000 | 83.3333 | 26284.72 KB | - | 'Magick Webp Lossless' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 161.44 ms | 3.749 ms | 0.206 ms | 1.00 | 0.00 | - | - | - | 519.26 KB | - | 'ImageSharp Webp Lossless' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 335.78 ms | 78.666 ms | 4.312 ms | 2.08 | 0.03 | 34000.0000 | 5000.0000 | 2000.0000 | 162727.56 KB | - | | | | | | | | | | | | | | - | 'Magick Webp Lossy' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 23.48 ms | 4.325 ms | 0.237 ms | 0.15 | 0.00 | - | - | - | 67.66 KB | - | 'ImageSharp Webp Lossy' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 43.29 ms | 16.503 ms | 0.905 ms | 0.27 | 0.01 | 6272.7273 | 272.7273 | 90.9091 | 26284.86 KB | - | 'Magick Webp Lossless' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 161.81 ms | 10.693 ms | 0.586 ms | 1.00 | 0.00 | - | - | - | 523.25 KB | - | 'ImageSharp Webp Lossless' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 323.97 ms | 235.468 ms | 12.907 ms | 2.00 | 0.08 | 34000.0000 | 5000.0000 | 2000.0000 | 162724.84 KB | - | | | | | | | | | | | | | | - | 'Magick Webp Lossy' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 23.36 ms | 0.448 ms | 0.025 ms | 0.14 | 0.00 | - | - | - | 67.66 KB | - | 'ImageSharp Webp Lossy' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 40.11 ms | 2.465 ms | 0.135 ms | 0.25 | 0.00 | 6307.6923 | 230.7692 | 76.9231 | 26284.71 KB | - | 'Magick Webp Lossless' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 161.55 ms | 6.662 ms | 0.365 ms | 1.00 | 0.00 | - | - | - | 518.84 KB | - | 'ImageSharp Webp Lossless' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 298.73 ms | 17.953 ms | 0.984 ms | 1.85 | 0.01 | 34000.0000 | 5000.0000 | 2000.0000 | 162725.13 KB | + .NET SDK=6.0.100-rc.2.21505.57 + [Host] : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT + Job-WQLXJO : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT + Job-OJJAMD : .NET Core 3.1.20 (CoreCLR 4.700.21.47003, CoreFX 4.700.21.47101), X64 RyuJIT + Job-OMFOAS : .NET Framework 4.8 (4.8.4420.0), X64 RyuJIT + + IterationCount=3 LaunchCount=1 WarmupCount=3 + + | Method | Job | Runtime | Arguments | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + |--------------------------- |----------- |--------------------- |---------------------- |------------- |----------:|----------:|---------:|------:|--------:|------------:|----------:|----------:|-----------:| + | 'Magick Webp Lossy' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 23.33 ms | 1.491 ms | 0.082 ms | 0.15 | 0.00 | - | - | - | 67 KB | + | 'ImageSharp Webp Lossy' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 245.80 ms | 24.288 ms | 1.331 ms | 1.53 | 0.01 | 135000.0000 | - | - | 552,713 KB | + | 'Magick Webp Lossless' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 160.36 ms | 11.131 ms | 0.610 ms | 1.00 | 0.00 | - | - | - | 518 KB | + | 'ImageSharp Webp Lossless' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 313.93 ms | 45.605 ms | 2.500 ms | 1.96 | 0.01 | 34000.0000 | 5000.0000 | 2000.0000 | 161,670 KB | + | | | | | | | | | | | | | | | + | 'Magick Webp Lossy' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 23.36 ms | 2.289 ms | 0.125 ms | 0.15 | 0.00 | - | - | - | 67 KB | + | 'ImageSharp Webp Lossy' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 254.64 ms | 19.620 ms | 1.075 ms | 1.59 | 0.00 | 135000.0000 | - | - | 552,713 KB | + | 'Magick Webp Lossless' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 160.30 ms | 9.549 ms | 0.523 ms | 1.00 | 0.00 | - | - | - | 518 KB | + | 'ImageSharp Webp Lossless' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 320.35 ms | 22.924 ms | 1.257 ms | 2.00 | 0.01 | 34000.0000 | 5000.0000 | 2000.0000 | 161,669 KB | + | | | | | | | | | | | | | | | + | 'Magick Webp Lossy' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 23.37 ms | 0.908 ms | 0.050 ms | 0.15 | 0.00 | - | - | - | 68 KB | + | 'ImageSharp Webp Lossy' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 378.67 ms | 25.540 ms | 1.400 ms | 2.36 | 0.01 | 135000.0000 | - | - | 554,351 KB | + | 'Magick Webp Lossless' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 160.13 ms | 5.115 ms | 0.280 ms | 1.00 | 0.00 | - | - | - | 520 KB | + | 'ImageSharp Webp Lossless' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 379.01 ms | 71.192 ms | 3.902 ms | 2.37 | 0.02 | 34000.0000 | 5000.0000 | 2000.0000 | 162,119 KB | */ } } From b9e8f76990206843b485006bac8b9ff2cceb05ed Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 7 Nov 2021 18:07:43 +1100 Subject: [PATCH 43/85] Update FromPixel --- src/ImageSharp/Color/Color.Conversions.cs | 11 +++++++++++ src/ImageSharp/Color/Color.cs | 22 +++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index 424b7dcdfe..96aa05c961 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -23,6 +23,17 @@ public Color(Rgba64 pixel) this.boxedHighPrecisionPixel = null; } + /// + /// Initializes a new instance of the struct. + /// + /// The containing the color information. + [MethodImpl(InliningOptions.ShortMethod)] + public Color(Rgb48 pixel) + { + this.data = new Rgba64(pixel.R, pixel.G, pixel.B, ushort.MaxValue); + this.boxedHighPrecisionPixel = null; + } + /// /// Initializes a new instance of the struct. /// diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index 61d6c8e6d5..c461d034eb 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -107,7 +107,27 @@ private Color(IPixel pixel) [MethodImpl(InliningOptions.ShortMethod)] public static Color FromPixel(TPixel pixel) where TPixel : unmanaged, IPixel - => new(pixel); + { + // Avoid boxing in case we can convert to Rgba64 safely and efficently + if (typeof(TPixel) == typeof(Rgba64)) + { + return new((Rgba64)(object)pixel); + } + else if (typeof(TPixel) == typeof(Rgb48)) + { + return new((Rgb48)(object)pixel); + } + else if (Unsafe.SizeOf() <= Unsafe.SizeOf()) + { + Rgba32 p = default; + pixel.ToRgba32(ref p); + return new(p); + } + else + { + return new(pixel); + } + } /// /// Creates a new instance of the struct From 5b1720eb8deccd3ea37248111a68df73ce632c3a Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 13:27:08 +0100 Subject: [PATCH 44/85] Add sse41 version of Hadamard transform --- .../Formats/Webp/Lossy/LossyUtils.cs | 151 +++++++++++++++++- 1 file changed, 146 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 04ff80b2d9..0993e2a666 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,11 +4,15 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal static class LossyUtils + internal static unsafe class LossyUtils { [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); @@ -61,11 +65,12 @@ public static void Copy(Span src, Span dst, int w, int h) public static int Vp8Disto16X16(Span a, Span b, Span w) { int d = 0; + int dataSize = (4 * WebpConstants.Bps) - 16; for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps) { for (int x = 0; x < 16; x += 4) { - d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w); + d += Vp8Disto4X4(a.Slice(x + y, dataSize), b.Slice(x + y, dataSize), w); } } @@ -75,9 +80,19 @@ public static int Vp8Disto16X16(Span a, Span b, Span w) [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Disto4X4(Span a, Span b, Span w) { - int sum1 = TTransform(a, w); - int sum2 = TTransform(b, w); - return Math.Abs(sum2 - sum1) >> 5; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse41.IsSupported) + { + int diffSum = TTransformSse41(a, b, w); + return Math.Abs(diffSum) >> 5; + } + else +#endif + { + int sum1 = TTransform(a, w); + int sum2 = TTransform(b, w); + return Math.Abs(sum2 - sum1) >> 5; + } } public static void DC16(Span dst, Span yuv, int offset) @@ -591,6 +606,132 @@ public static int TTransform(Span input, Span w) return sum; } +#if SUPPORTS_RUNTIME_INTRINSICS + /// + /// Hadamard transform + /// Returns the weighted sum of the absolute value of transformed coefficients. + /// w[] contains a row-major 4 by 4 symmetric matrix. + /// + public static int TTransformSse41(Span inputA, Span inputB, Span w) + { + Span sum = stackalloc int[4]; +#pragma warning disable SA1503 // Braces should not be omitted + fixed (byte* inputAPtr = inputA) + fixed (byte* inputBPtr = inputB) + fixed (ushort* wPtr = w) + fixed (int* outputPtr = sum) + { + // Load and combine inputs. + Vector128 ina0 = Sse2.LoadVector128(inputAPtr); + Vector128 ina1 = Sse2.LoadVector128(inputAPtr + (WebpConstants.Bps * 1)); + Vector128 ina2 = Sse2.LoadVector128(inputAPtr + (WebpConstants.Bps * 2)); + Vector128 ina3 = Sse2.LoadVector128((long*)(inputAPtr + (WebpConstants.Bps * 3))); + Vector128 inb0 = Sse2.LoadVector128(inputBPtr); + Vector128 inb1 = Sse2.LoadVector128(inputBPtr + (WebpConstants.Bps * 1)); + Vector128 inb2 = Sse2.LoadVector128(inputBPtr + (WebpConstants.Bps * 2)); + Vector128 inb3 = Sse2.LoadVector128((long*)(inputBPtr + (WebpConstants.Bps * 3))); + + // Combine inA and inB (we'll do two transforms in parallel). + Vector128 inab0 = Sse2.UnpackLow(ina0.AsInt32(), inb0.AsInt32()); + Vector128 inab1 = Sse2.UnpackLow(ina1.AsInt32(), inb1.AsInt32()); + Vector128 inab2 = Sse2.UnpackLow(ina2.AsInt32(), inb2.AsInt32()); + Vector128 inab3 = Sse2.UnpackLow(ina3.AsInt32(), inb3.AsInt32()); + Vector128 tmp0 = Sse41.ConvertToVector128Int16(inab0.AsByte()); + Vector128 tmp1 = Sse41.ConvertToVector128Int16(inab1.AsByte()); + Vector128 tmp2 = Sse41.ConvertToVector128Int16(inab2.AsByte()); + Vector128 tmp3 = Sse41.ConvertToVector128Int16(inab3.AsByte()); + + // a00 a01 a02 a03 b00 b01 b02 b03 + // a10 a11 a12 a13 b10 b11 b12 b13 + // a20 a21 a22 a23 b20 b21 b22 b23 + // a30 a31 a32 a33 b30 b31 b32 b33 + // Vertical pass first to avoid a transpose (vertical and horizontal passes + // are commutative because w/kWeightY is symmetric) and subsequent transpose. + // Calculate a and b (two 4x4 at once). + Vector128 a0 = Sse2.Add(tmp0, tmp2); + Vector128 a1 = Sse2.Add(tmp1, tmp3); + Vector128 a2 = Sse2.Subtract(tmp1, tmp3); + Vector128 a3 = Sse2.Subtract(tmp0, tmp2); + Vector128 b0 = Sse2.Add(a0, a1); + Vector128 b1 = Sse2.Add(a3, a2); + Vector128 b2 = Sse2.Subtract(a3, a2); + Vector128 b3 = Sse2.Subtract(a0, a1); + + // a00 a01 a02 a03 b00 b01 b02 b03 + // a10 a11 a12 a13 b10 b11 b12 b13 + // a20 a21 a22 a23 b20 b21 b22 b23 + // a30 a31 a32 a33 b30 b31 b32 b33 + // Transpose the two 4x4. + Vector128 transpose00 = Sse2.UnpackLow(b0, b1); + Vector128 transpose01 = Sse2.UnpackLow(b2, b3); + Vector128 transpose02 = Sse2.UnpackHigh(b0, b1); + Vector128 transpose03 = Sse2.UnpackHigh(b2, b3); + + // a00 a10 a01 a11 a02 a12 a03 a13 + // a20 a30 a21 a31 a22 a32 a23 a33 + // b00 b10 b01 b11 b02 b12 b03 b13 + // b20 b30 b21 b31 b22 b32 b23 b33 + Vector128 transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32()); + Vector128 transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32()); + Vector128 transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32()); + Vector128 transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32()); + + // a00 a10 a20 a30 a01 a11 a21 a31 + // b00 b10 b20 b30 b01 b11 b21 b31 + // a02 a12 a22 a32 a03 a13 a23 a33 + // b02 b12 a22 b32 b03 b13 b23 b33 + Vector128 output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64()); + Vector128 output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64()); + Vector128 output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64()); + Vector128 output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64()); + + // a00 a10 a20 a30 b00 b10 b20 b30 + // a01 a11 a21 a31 b01 b11 b21 b31 + // a02 a12 a22 a32 b02 b12 b22 b32 + // a03 a13 a23 a33 b03 b13 b23 b33 + // Horizontal pass and difference of weighted sums. + Vector128 w0 = Sse2.LoadVector128(wPtr); + Vector128 w8 = Sse2.LoadVector128(wPtr + 8); + + // Calculate a and b (two 4x4 at once). + a0 = Sse2.Add(output0.AsInt16(), output2.AsInt16()); + a1 = Sse2.Add(output1.AsInt16(), output3.AsInt16()); + a2 = Sse2.Subtract(output1.AsInt16(), output3.AsInt16()); + a3 = Sse2.Subtract(output0.AsInt16(), output2.AsInt16()); + b0 = Sse2.Add(a0, a1); + b1 = Sse2.Add(a3, a2); + b2 = Sse2.Subtract(a3, a2); + b3 = Sse2.Subtract(a0, a1); + + // Separate the transforms of inA and inB. + Vector128 ab0 = Sse2.UnpackLow(b0.AsInt64(), b1.AsInt64()); + Vector128 ab2 = Sse2.UnpackLow(b2.AsInt64(), b3.AsInt64()); + Vector128 bb0 = Sse2.UnpackHigh(b0.AsInt64(), b1.AsInt64()); + Vector128 bb2 = Sse2.UnpackHigh(b2.AsInt64(), b3.AsInt64()); + + Vector128 ab0Abs = Ssse3.Abs(ab0.AsInt16()); + Vector128 ab2Abs = Ssse3.Abs(ab2.AsInt16()); + Vector128 b0Abs = Ssse3.Abs(bb0.AsInt16()); + Vector128 bb2Abs = Ssse3.Abs(bb2.AsInt16()); + + // weighted sums. + Vector128 ab0mulw0 = Sse2.MultiplyAddAdjacent(ab0Abs.AsInt16(), w0.AsInt16()); + Vector128 ab2mulw8 = Sse2.MultiplyAddAdjacent(ab2Abs.AsInt16(), w8.AsInt16()); + Vector128 b0mulw0 = Sse2.MultiplyAddAdjacent(b0Abs.AsInt16(), w0.AsInt16()); + Vector128 bb2mulw8 = Sse2.MultiplyAddAdjacent(bb2Abs.AsInt16(), w8.AsInt16()); + Vector128 ab0ab2Sum = Sse2.Add(ab0mulw0, ab2mulw8); + Vector128 b0w0bb2w8Sum = Sse2.Add(b0mulw0, bb2mulw8); + + // difference of weighted sums. + Vector128 result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32()); + Sse2.Store(outputPtr, result.AsInt32()); + } + + return sum[3] + sum[2] + sum[1] + sum[0]; +#pragma warning restore SA1503 // Braces should not be omitted + } +#endif + public static void TransformTwo(Span src, Span dst) { TransformOne(src, dst); From d2017933d7042d3757062cfe3134206652ce7b27 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 13:31:11 +0100 Subject: [PATCH 45/85] Add HadamardTransform sse tests --- .../Formats/WebP/LossyUtilsTests.cs | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs new file mode 100644 index 0000000000..6a9a078d7c --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -0,0 +1,58 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using SixLabors.ImageSharp.Formats.Webp.Lossy; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Formats.WebP +{ + [Trait("Format", "Webp")] + public class LossyUtilsTests + { + private static void RunHadamardTransformTest() + { + byte[] a = + { + 27, 27, 28, 29, 29, 28, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, + 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 29, 29, 28, + 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 26, + 26, 26, 26, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, + 128, 128, 128, 128, 128, 128, 128, 28, 27, 27, 26, 26, 27, 27, 28, 27, 28, 28, 29, 29, 28, 28, 27 + }; + + byte[] b = + { + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, + 204, 204, 204, 204, 204, 204, 204, 204, 204, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, + 204, 204, 204, 204, 204, 204, 204, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28 + }; + + ushort[] w = { 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2 }; + int expected = 2; + + int actual = LossyUtils.Vp8Disto4X4(a, b, w); + Assert.Equal(expected, actual); + } + + [Fact] + public void HadamardTransform_Works() => RunHadamardTransformTest(); + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void HadamardTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.AllowAll); + + [Fact] + public void HadamardTransform_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE2); + + [Fact] + public void HadamardTransform_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE41); + + [Fact] + public void HadamardTransform_WithoutSSE2AndSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE41 | HwIntrinsics.DisableSSE2); +#endif + + } +} From 3a03fad75eaa8464d1bd84cccd307014f9417497 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 14:51:51 +0100 Subject: [PATCH 46/85] Add sse41 version of quantize block --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 176 ++++++++++++++---- 1 file changed, 144 insertions(+), 32 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 2ed4381660..02087ceda4 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -3,13 +3,17 @@ using System; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp.Formats.Webp.Lossy { /// /// Quantization methods. /// - internal static class QuantEnc + internal static unsafe class QuantEnc { private static readonly byte[] Zigzag = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; @@ -17,6 +21,18 @@ internal static class QuantEnc private const int MaxLevel = 2047; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 MaxCoeff2047 = Vector128.Create((short)MaxLevel); + + private static readonly Vector128 CstLo = Vector128.Create(0, 1, 2, 3, 8, 9, 254, 255, 10, 11, 4, 5, 6, 7, 12, 13); + + private static readonly Vector128 Cst7 = Vector128.Create(254, 255, 254, 255, 254, 255, 254, 255, 14, 15, 254, 255, 254, 255, 254, 255); + + private static readonly Vector128 CstHi = Vector128.Create(2, 3, 8, 9, 10, 11, 4, 5, 254, 255, 6, 7, 12, 13, 14, 15); + + private static readonly Vector128 Cst8 = Vector128.Create(254, 255, 254, 255, 254, 255, 0, 1, 254, 255, 254, 255, 254, 255, 254, 255); +#endif + // Diffusion weights. We under-correct a bit (15/16th of the error is actually // diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0. private const int C1 = 7; // fraction of error sent to the 4x4 block below @@ -486,51 +502,147 @@ public static void RefineUsingDistortion(Vp8EncIterator it, Vp8SegmentInfo[] seg [MethodImpl(InliningOptions.ShortMethod)] public static int Quantize2Blocks(Span input, Span output, Vp8Matrix mtx) { - int nz = QuantizeBlock(input, output, mtx) << 0; - nz |= QuantizeBlock(input.Slice(1 * 16), output.Slice(1 * 16), mtx) << 1; + int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), mtx) << 0; + nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), mtx) << 1; return nz; } public static int QuantizeBlock(Span input, Span output, Vp8Matrix mtx) { - int last = -1; - int n; - for (n = 0; n < 16; ++n) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse41.IsSupported) { - int j = Zigzag[n]; - bool sign = input[j] < 0; - uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]); - if (coeff > mtx.ZThresh[j]) +#pragma warning disable SA1503 // Braces should not be omitted + fixed (ushort* mtxIqPtr = mtx.IQ) + fixed (ushort* mtxQPtr = mtx.Q) + fixed (uint* biasQPtr = mtx.Bias) + fixed (short* inputPtr = input) + fixed (short* outputPtr = output) { - uint q = mtx.Q[j]; - uint iQ = mtx.IQ[j]; - uint b = mtx.Bias[j]; - int level = QuantDiv(coeff, iQ, b); - if (level > MaxLevel) + // Load all inputs. + Vector128 input0 = Sse2.LoadVector128(inputPtr); + Vector128 input8 = Sse2.LoadVector128(inputPtr + 8); + Vector128 iq0 = Sse2.LoadVector128(mtxIqPtr); + Vector128 iq8 = Sse2.LoadVector128(mtxIqPtr + 8); + Vector128 q0 = Sse2.LoadVector128(mtxQPtr); + Vector128 q8 = Sse2.LoadVector128(mtxQPtr + 8); + + // coeff = abs(in) + Vector128 coeff0 = Ssse3.Abs(input0); + Vector128 coeff8 = Ssse3.Abs(input8); + + // out = (coeff * iQ + B) >> QFIX + // doing calculations with 32b precision (QFIX=17) + // out = (coeff * iQ) + Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); + Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); + Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); + Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); + Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); + Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); + Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); + Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); + + // out = (coeff * iQ + B) + Vector128 bias00 = Sse2.LoadVector128(biasQPtr); + Vector128 bias04 = Sse2.LoadVector128(biasQPtr + 4); + Vector128 bias08 = Sse2.LoadVector128(biasQPtr + 8); + Vector128 bias12 = Sse2.LoadVector128(biasQPtr + 12); + out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); + out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); + out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); + out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); + + // out = QUANTDIV(coeff, iQ, B, QFIX) + out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); + out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); + out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); + out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); + + // pack result as 16b + Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); + Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); + + // if (coeff > 2047) coeff = 2047 + out0 = Sse2.Min(out0, MaxCoeff2047); + out8 = Sse2.Min(out8, MaxCoeff2047); + + // put sign back + out0 = Ssse3.Sign(out0, input0); + out8 = Ssse3.Sign(out8, input8); + + // in = out * Q + input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); + input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); + + // in = out * Q + Sse2.Store(inputPtr, input0); + Sse2.Store(inputPtr + 8, input8); + + // zigzag the output before storing it. The re-ordering is: + // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 + // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 + // There's only two misplaced entries ([8] and [7]) that are crossing the + // reg's boundaries. + // We use pshufb instead of pshuflo/pshufhi. + Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); + Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 + Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); + Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 + Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); + Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + Sse2.Store(outputPtr, outZ0.AsInt16()); + Sse2.Store(outputPtr + 8, outZ8.AsInt16()); + Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); + + // Detect if all 'out' values are zeroes or not. + Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); + return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; + } +#pragma warning restore SA1503 // Braces should not be omitted + } + else +#endif + { + int last = -1; + int n; + for (n = 0; n < 16; ++n) + { + int j = Zigzag[n]; + bool sign = input[j] < 0; + uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]); + if (coeff > mtx.ZThresh[j]) { - level = MaxLevel; - } + uint q = mtx.Q[j]; + uint iQ = mtx.IQ[j]; + uint b = mtx.Bias[j]; + int level = QuantDiv(coeff, iQ, b); + if (level > MaxLevel) + { + level = MaxLevel; + } - if (sign) - { - level = -level; - } + if (sign) + { + level = -level; + } - input[j] = (short)(level * (int)q); - output[n] = (short)level; - if (level != 0) + input[j] = (short)(level * (int)q); + output[n] = (short)level; + if (level != 0) + { + last = n; + } + } + else { - last = n; + output[n] = 0; + input[j] = 0; } } - else - { - output[n] = 0; - input[j] = 0; - } - } - return last >= 0 ? 1 : 0; + return last >= 0 ? 1 : 0; + } } // Quantize as usual, but also compute and return the quantization error. From 020134ad8c15e58621635d4ca4b5fb4c6acdbe89 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 14:52:11 +0100 Subject: [PATCH 47/85] Add QuantizeBlock sse tests --- .../Formats/Webp/Lossy/Vp8Matrix.cs | 9 +++ .../Formats/WebP/QuantEncTests.cs | 56 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs index 4276b887f0..e525e388b8 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs @@ -34,6 +34,15 @@ public Vp8Matrix() this.Sharpen = new short[16]; } + public Vp8Matrix(ushort[] q, ushort[] iq, uint[] bias, uint[] zThresh, short[] sharpen) + { + this.Q = q; + this.IQ = iq; + this.Bias = bias; + this.ZThresh = zThresh; + this.Sharpen = sharpen; + } + /// /// Gets the quantizer steps. /// diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs new file mode 100644 index 0000000000..280a7902ae --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -0,0 +1,56 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.Linq; +using SixLabors.ImageSharp.Formats.Webp.Lossy; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Formats.WebP +{ + [Trait("Format", "Webp")] + public class QuantEncTests + { + private static void RunQuantizeBlockTest() + { + // arrange + short[] input = { 378, 777, -851, 888, 259, 148, 0, -111, -185, -185, -74, -37, 148, 74, 111, 74 }; + short[] output = new short[16]; + ushort[] q = { 42, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37 }; + ushort[] iq = { 3120, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542 }; + uint[] bias = + { + 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, + 55296, 55296 + }; + uint[] zthresh = { 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21 }; + short[] expectedOutput = { 9, 21, 7, -5, 4, -23, 24, 0, -5, 4, 2, -2, -3, -1, 3, 2 }; + int expectedResult = 1; + var vp8Matrix = new Vp8Matrix(q, iq, bias, zthresh, new short[16]); + + // act + int actualResult = QuantEnc.QuantizeBlock(input, output, vp8Matrix); + + // assert + Assert.True(output.SequenceEqual(expectedOutput)); + Assert.Equal(expectedResult, actualResult); + } + + [Fact] + public void QuantizeBlock_Works() => RunQuantizeBlockTest(); + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void QuantizeBlock_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.AllowAll); + + [Fact] + public void QuantizeBlock_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2); + + [Fact] + public void QuantizeBlock_WithoutSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSSE3); + + [Fact] + public void QuantizeBlock_WithoutSSE2AndSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3); +#endif + } +} From a628909b8da58e9dbd10bfa3b70e9c8ce66ddc1d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 15:02:08 +0100 Subject: [PATCH 48/85] Add coeff = abs(in) + sharpen --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 02087ceda4..b812909b20 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -516,6 +516,7 @@ public static int QuantizeBlock(Span input, Span output, Vp8Matrix fixed (ushort* mtxIqPtr = mtx.IQ) fixed (ushort* mtxQPtr = mtx.Q) fixed (uint* biasQPtr = mtx.Bias) + fixed (short* sharpenPtr = mtx.Sharpen) fixed (short* inputPtr = input) fixed (short* outputPtr = output) { @@ -531,6 +532,12 @@ public static int QuantizeBlock(Span input, Span output, Vp8Matrix Vector128 coeff0 = Ssse3.Abs(input0); Vector128 coeff8 = Ssse3.Abs(input8); + // coeff = abs(in) + sharpen + Vector128 sharpen0 = Sse2.LoadVector128(sharpenPtr); + Vector128 sharpen8 = Sse2.LoadVector128(sharpenPtr + 8); + Sse2.Add(coeff0.AsInt16(), sharpen0); + Sse2.Add(coeff8.AsInt16(), sharpen8); + // out = (coeff * iQ + B) >> QFIX // doing calculations with 32b precision (QFIX=17) // out = (coeff * iQ) From af90336173a1ee20a6c894c113e5f799b139bf9f Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sun, 7 Nov 2021 15:25:47 +0100 Subject: [PATCH 49/85] stackalloc header buffer in InternalDetectFormat --- src/ImageSharp/Image.Decode.cs | 51 +++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/src/ImageSharp/Image.Decode.cs b/src/ImageSharp/Image.Decode.cs index 94da2c9958..ee340bf86e 100644 --- a/src/ImageSharp/Image.Decode.cs +++ b/src/ImageSharp/Image.Decode.cs @@ -58,31 +58,42 @@ private static IImageFormat InternalDetectFormat(Stream stream, Configuration co return null; } - using (IMemoryOwner buffer = config.MemoryAllocator.Allocate(headerSize, AllocationOptions.Clean)) + // Header sizes are so small, that headersBuffer will be always stackalloc-ed in practice, + // and heap allocation will never happen, there is no need for the usual try-finally ArrayPool dance. + // The array case is only a safety mechanism following stackalloc best practices. + Span headersBuffer = headerSize > 512 ? new byte[headerSize] : stackalloc byte[headerSize]; + long startPosition = stream.Position; + + // Read doesn't always guarantee the full returned length so read a byte + // at a time until we get either our count or hit the end of the stream. + int n = 0; + int i; + do { - Span bufferSpan = buffer.GetSpan(); - long startPosition = stream.Position; + i = stream.Read(headersBuffer, n, headerSize - n); + n += i; + } + while (n < headerSize && i > 0); - // Read doesn't always guarantee the full returned length so read a byte - // at a time until we get either our count or hit the end of the stream. - int n = 0; - int i; - do + stream.Position = startPosition; + + // Does the given stream contain enough data to fit in the header for the format + // and does that data match the format specification? + // Individual formats should still check since they are public. + IImageFormat format = null; + foreach (IImageFormatDetector formatDetector in config.ImageFormatsManager.FormatDetectors) + { + if (formatDetector.HeaderSize <= headerSize) { - i = stream.Read(bufferSpan, n, headerSize - n); - n += i; + IImageFormat attemptFormat = formatDetector.DetectFormat(headersBuffer); + if (attemptFormat != null) + { + format = attemptFormat; + } } - while (n < headerSize && i > 0); - - stream.Position = startPosition; - - // Does the given stream contain enough data to fit in the header for the format - // and does that data match the format specification? - // Individual formats should still check since they are public. - return config.ImageFormatsManager.FormatDetectors - .Where(x => x.HeaderSize <= headerSize) - .Select(x => x.DetectFormat(buffer.GetSpan())).LastOrDefault(x => x != null); } + + return format; } /// From 765f5a23138ce905056a2e7f69f4a3c0feaf4842 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 16:13:28 +0100 Subject: [PATCH 50/85] Add SSE2 version of Mean16x4 --- .../Formats/Webp/Lossy/Vp8EncIterator.cs | 73 ++++++++++++++++--- 1 file changed, 61 insertions(+), 12 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs index 79fd8d8543..489977cb82 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs @@ -2,6 +2,10 @@ // Licensed under the Apache License, Version 2.0. using System; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp.Formats.Webp.Lossy { @@ -9,7 +13,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// Iterator structure to iterate through macroblocks, pointing to the /// right neighbouring data (samples, predictions, contexts, ...) /// - internal class Vp8EncIterator + internal unsafe class Vp8EncIterator { public const int YOffEnc = 0; @@ -29,6 +33,10 @@ internal class Vp8EncIterator private readonly int mbh; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 Mean16x4Mask = Vector128.Create(0x00ff).AsByte(); +#endif + /// /// Stride of the prediction plane(=4*mbw + 1). /// @@ -357,12 +365,13 @@ public int FastMbAnalyze(int quality) int q = quality; int kThreshold = 8 + ((17 - 8) * q / 100); int k; - uint[] dc = new uint[16]; + Span dc = stackalloc uint[16]; + Span tmp = stackalloc ushort[16]; uint m; uint m2; for (k = 0; k < 16; k += 4) { - this.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.AsSpan(k)); + this.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.Slice(k, 4), tmp); } for (m = 0, m2 = 0, k = 0; k < 16; ++k) @@ -823,21 +832,61 @@ public void BytesToNz() this.Nz[this.nzIdx] = nz; } - private void Mean16x4(Span input, Span dc) + private void Mean16x4(Span input, Span dc, Span tmp) { - for (int k = 0; k < 4; k++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) { - uint avg = 0; - for (int y = 0; y < 4; y++) +#pragma warning disable SA1503 // Braces should not be omitted + tmp.Clear(); + fixed (byte* inputPtr = input) + fixed (ushort* tmpPtr = tmp) { - for (int x = 0; x < 4; x++) + Vector128 a0 = Sse2.LoadVector128(inputPtr); + Vector128 a1 = Sse2.LoadVector128(inputPtr + WebpConstants.Bps); + Vector128 a2 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 2)); + Vector128 a3 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 3)); + Vector128 b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte + Vector128 b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8); + Vector128 b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8); + Vector128 b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8); + Vector128 c0 = Sse2.And(a0, Mean16x4Mask); // lo byte + Vector128 c1 = Sse2.And(a1, Mean16x4Mask); + Vector128 c2 = Sse2.And(a2, Mean16x4Mask); + Vector128 c3 = Sse2.And(a3, Mean16x4Mask); + Vector128 d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32()); + Vector128 d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32()); + Vector128 d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32()); + Vector128 d3 = Sse2.Add(b3.AsInt32(), c3.AsInt32()); + Vector128 e0 = Sse2.Add(d0, d1); + Vector128 e1 = Sse2.Add(d2, d3); + Vector128 f0 = Sse2.Add(e0, e1); + Sse2.Store(tmpPtr, f0.AsUInt16()); + } +#pragma warning restore SA1503 // Braces should not be omitted + + dc[0] = (uint)(tmp[1] + tmp[0]); + dc[1] = (uint)(tmp[3] + tmp[2]); + dc[2] = (uint)(tmp[5] + tmp[4]); + dc[3] = (uint)(tmp[7] + tmp[6]); + } + else +#endif + { + for (int k = 0; k < 4; k++) + { + uint avg = 0; + for (int y = 0; y < 4; y++) { - avg += input[x + (y * WebpConstants.Bps)]; + for (int x = 0; x < 4; x++) + { + avg += input[x + (y * WebpConstants.Bps)]; + } } - } - dc[k] = avg; - input = input.Slice(4); // go to next 4x4 block. + dc[k] = avg; + input = input.Slice(4); // go to next 4x4 block. + } } } From 8b8871b3ba75581ee2ff5f3fcb294bd640743136 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 16:39:42 +0100 Subject: [PATCH 51/85] Make Mean16x4 static and move to LossyUtils --- .../Formats/Webp/Lossy/LossyUtils.cs | 68 +++++++++++++++++- .../Formats/Webp/Lossy/Vp8EncIterator.cs | 72 +------------------ 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index d5db3dffa5..c3f6e522ac 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,12 +4,20 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal static class LossyUtils + internal static unsafe class LossyUtils { +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 Mean16x4Mask = Vector128.Create(0x00ff).AsByte(); +#endif + [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); @@ -801,6 +809,64 @@ public static void HFilter8i(Span u, Span v, int offset, int stride, FilterLoop24(v, offsetPlus4, 1, stride, 8, thresh, ithresh, hevThresh); } + public static void Mean16x4(Span input, Span dc, Span tmp) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { +#pragma warning disable SA1503 // Braces should not be omitted + tmp.Clear(); + fixed (byte* inputPtr = input) + fixed (ushort* tmpPtr = tmp) + { + Vector128 a0 = Sse2.LoadVector128(inputPtr); + Vector128 a1 = Sse2.LoadVector128(inputPtr + WebpConstants.Bps); + Vector128 a2 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 2)); + Vector128 a3 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 3)); + Vector128 b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte + Vector128 b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8); + Vector128 b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8); + Vector128 b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8); + Vector128 c0 = Sse2.And(a0, Mean16x4Mask); // lo byte + Vector128 c1 = Sse2.And(a1, Mean16x4Mask); + Vector128 c2 = Sse2.And(a2, Mean16x4Mask); + Vector128 c3 = Sse2.And(a3, Mean16x4Mask); + Vector128 d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32()); + Vector128 d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32()); + Vector128 d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32()); + Vector128 d3 = Sse2.Add(b3.AsInt32(), c3.AsInt32()); + Vector128 e0 = Sse2.Add(d0, d1); + Vector128 e1 = Sse2.Add(d2, d3); + Vector128 f0 = Sse2.Add(e0, e1); + Sse2.Store(tmpPtr, f0.AsUInt16()); + } +#pragma warning restore SA1503 // Braces should not be omitted + + dc[0] = (uint)(tmp[1] + tmp[0]); + dc[1] = (uint)(tmp[3] + tmp[2]); + dc[2] = (uint)(tmp[5] + tmp[4]); + dc[3] = (uint)(tmp[7] + tmp[6]); + } + else +#endif + { + for (int k = 0; k < 4; k++) + { + uint avg = 0; + for (int y = 0; y < 4; y++) + { + for (int x = 0; x < 4; x++) + { + avg += input[x + (y * WebpConstants.Bps)]; + } + } + + dc[k] = avg; + input = input.Slice(4); // go to next 4x4 block. + } + } + } + [MethodImpl(InliningOptions.ShortMethod)] public static uint LoadUv(byte u, byte v) => (uint)(u | (v << 16)); // We process u and v together stashed into 32bit(16bit each). diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs index 489977cb82..57e18832ed 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs @@ -2,10 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -#endif namespace SixLabors.ImageSharp.Formats.Webp.Lossy { @@ -13,7 +9,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// Iterator structure to iterate through macroblocks, pointing to the /// right neighbouring data (samples, predictions, contexts, ...) /// - internal unsafe class Vp8EncIterator + internal class Vp8EncIterator { public const int YOffEnc = 0; @@ -33,10 +29,6 @@ internal unsafe class Vp8EncIterator private readonly int mbh; -#if SUPPORTS_RUNTIME_INTRINSICS - private static readonly Vector128 Mean16x4Mask = Vector128.Create(0x00ff).AsByte(); -#endif - /// /// Stride of the prediction plane(=4*mbw + 1). /// @@ -371,10 +363,10 @@ public int FastMbAnalyze(int quality) uint m2; for (k = 0; k < 16; k += 4) { - this.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.Slice(k, 4), tmp); + LossyUtils.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.Slice(k, 4), tmp); } - for (m = 0, m2 = 0, k = 0; k < 16; ++k) + for (m = 0, m2 = 0, k = 0; k < 16; k++) { m += dc[k]; m2 += dc[k] * dc[k]; @@ -832,64 +824,6 @@ public void BytesToNz() this.Nz[this.nzIdx] = nz; } - private void Mean16x4(Span input, Span dc, Span tmp) - { -#if SUPPORTS_RUNTIME_INTRINSICS - if (Sse2.IsSupported) - { -#pragma warning disable SA1503 // Braces should not be omitted - tmp.Clear(); - fixed (byte* inputPtr = input) - fixed (ushort* tmpPtr = tmp) - { - Vector128 a0 = Sse2.LoadVector128(inputPtr); - Vector128 a1 = Sse2.LoadVector128(inputPtr + WebpConstants.Bps); - Vector128 a2 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 2)); - Vector128 a3 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 3)); - Vector128 b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte - Vector128 b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8); - Vector128 b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8); - Vector128 b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8); - Vector128 c0 = Sse2.And(a0, Mean16x4Mask); // lo byte - Vector128 c1 = Sse2.And(a1, Mean16x4Mask); - Vector128 c2 = Sse2.And(a2, Mean16x4Mask); - Vector128 c3 = Sse2.And(a3, Mean16x4Mask); - Vector128 d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32()); - Vector128 d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32()); - Vector128 d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32()); - Vector128 d3 = Sse2.Add(b3.AsInt32(), c3.AsInt32()); - Vector128 e0 = Sse2.Add(d0, d1); - Vector128 e1 = Sse2.Add(d2, d3); - Vector128 f0 = Sse2.Add(e0, e1); - Sse2.Store(tmpPtr, f0.AsUInt16()); - } -#pragma warning restore SA1503 // Braces should not be omitted - - dc[0] = (uint)(tmp[1] + tmp[0]); - dc[1] = (uint)(tmp[3] + tmp[2]); - dc[2] = (uint)(tmp[5] + tmp[4]); - dc[3] = (uint)(tmp[7] + tmp[6]); - } - else -#endif - { - for (int k = 0; k < 4; k++) - { - uint avg = 0; - for (int y = 0; y < 4; y++) - { - for (int x = 0; x < 4; x++) - { - avg += input[x + (y * WebpConstants.Bps)]; - } - } - - dc[k] = avg; - input = input.Slice(4); // go to next 4x4 block. - } - } - } - private void ImportBlock(Span src, int srcStride, Span dst, int w, int h, int size) { int dstIdx = 0; From 984971e1d9aca406cfd41b742da96b2d8447fa1b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 16:48:10 +0100 Subject: [PATCH 52/85] Move yuv related methods to YuvConversion class --- .../Formats/Webp/Lossy/LossyUtils.cs | 31 ------------------- .../Formats/Webp/Lossy/WebpLossyDecoder.cs | 24 +++++++------- .../Formats/Webp/Lossy/YuvConversion.cs | 31 +++++++++++++++++++ 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index c3f6e522ac..b2513feb55 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -867,27 +867,6 @@ public static void Mean16x4(Span input, Span dc, Span tmp) } } - [MethodImpl(InliningOptions.ShortMethod)] - public static uint LoadUv(byte u, byte v) => - (uint)(u | (v << 16)); // We process u and v together stashed into 32bit(16bit each). - - [MethodImpl(InliningOptions.ShortMethod)] - public static void YuvToBgr(int y, int u, int v, Span bgr) - { - bgr[0] = (byte)YuvToB(y, u); - bgr[1] = (byte)YuvToG(y, u, v); - bgr[2] = (byte)YuvToR(y, v); - } - - [MethodImpl(InliningOptions.ShortMethod)] - public static int YuvToB(int y, int u) => Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685); - - [MethodImpl(InliningOptions.ShortMethod)] - public static int YuvToG(int y, int u, int v) => Clip8(MultHi(y, 19077) - MultHi(u, 6419) - MultHi(v, 13320) + 8708); - - [MethodImpl(InliningOptions.ShortMethod)] - public static int YuvToR(int y, int v) => Clip8(MultHi(y, 19077) + MultHi(v, 26149) - 14234); - [MethodImpl(InliningOptions.ShortMethod)] public static byte Avg2(byte a, byte b) => (byte)((a + b + 1) >> 1); @@ -1092,9 +1071,6 @@ private static bool Hev(Span p, int offset, int step, int thresh) return WebpLookupTables.Abs0(p1 - p0) > thresh || WebpLookupTables.Abs0(q1 - q0) > thresh; } - [MethodImpl(InliningOptions.ShortMethod)] - private static int MultHi(int v, int coeff) => (v * coeff) >> 8; - [MethodImpl(InliningOptions.ShortMethod)] private static void Store(Span dst, int x, int y, int v) { @@ -1117,13 +1093,6 @@ private static void Store2(Span dst, int y, int dc, int d, int c) [MethodImpl(InliningOptions.ShortMethod)] private static int Mul2(int a) => (a * 35468) >> 16; - [MethodImpl(InliningOptions.ShortMethod)] - private static byte Clip8(int v) - { - int yuvMask = (256 << 6) - 1; - return (byte)((v & ~yuvMask) == 0 ? v >> 6 : v < 0 ? 0 : 255); - } - [MethodImpl(InliningOptions.ShortMethod)] private static void Put8x8uv(byte value, Span dst) { diff --git a/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs b/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs index 4f283f9f53..2f78842c63 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs @@ -747,21 +747,21 @@ private void UpSample(Span topY, Span bottomY, Span topU, Span { int xStep = 3; int lastPixelPair = (len - 1) >> 1; - uint tluv = LossyUtils.LoadUv(topU[0], topV[0]); // top-left sample - uint luv = LossyUtils.LoadUv(curU[0], curV[0]); // left-sample + uint tluv = YuvConversion.LoadUv(topU[0], topV[0]); // top-left sample + uint luv = YuvConversion.LoadUv(curU[0], curV[0]); // left-sample uint uv0 = ((3 * tluv) + luv + 0x00020002u) >> 2; - LossyUtils.YuvToBgr(topY[0], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst); + YuvConversion.YuvToBgr(topY[0], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst); if (bottomY != null) { uv0 = ((3 * luv) + tluv + 0x00020002u) >> 2; - LossyUtils.YuvToBgr(bottomY[0], (int)uv0 & 0xff, (int)(uv0 >> 16), bottomDst); + YuvConversion.YuvToBgr(bottomY[0], (int)uv0 & 0xff, (int)(uv0 >> 16), bottomDst); } for (int x = 1; x <= lastPixelPair; x++) { - uint tuv = LossyUtils.LoadUv(topU[x], topV[x]); // top sample - uint uv = LossyUtils.LoadUv(curU[x], curV[x]); // sample + uint tuv = YuvConversion.LoadUv(topU[x], topV[x]); // top sample + uint uv = YuvConversion.LoadUv(curU[x], curV[x]); // sample // Precompute invariant values associated with first and second diagonals. uint avg = tluv + tuv + luv + uv + 0x00080008u; @@ -770,15 +770,15 @@ private void UpSample(Span topY, Span bottomY, Span topU, Span uv0 = (diag12 + tluv) >> 1; uint uv1 = (diag03 + tuv) >> 1; int xMul2 = x * 2; - LossyUtils.YuvToBgr(topY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((xMul2 - 1) * xStep)); - LossyUtils.YuvToBgr(topY[xMul2 - 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), topDst.Slice((xMul2 - 0) * xStep)); + YuvConversion.YuvToBgr(topY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((xMul2 - 1) * xStep)); + YuvConversion.YuvToBgr(topY[xMul2 - 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), topDst.Slice((xMul2 - 0) * xStep)); if (bottomY != null) { uv0 = (diag03 + luv) >> 1; uv1 = (diag12 + uv) >> 1; - LossyUtils.YuvToBgr(bottomY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((xMul2 - 1) * xStep)); - LossyUtils.YuvToBgr(bottomY[xMul2 + 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), bottomDst.Slice((xMul2 + 0) * xStep)); + YuvConversion.YuvToBgr(bottomY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((xMul2 - 1) * xStep)); + YuvConversion.YuvToBgr(bottomY[xMul2 + 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), bottomDst.Slice((xMul2 + 0) * xStep)); } tluv = tuv; @@ -788,11 +788,11 @@ private void UpSample(Span topY, Span bottomY, Span topU, Span if ((len & 1) == 0) { uv0 = ((3 * tluv) + luv + 0x00020002u) >> 2; - LossyUtils.YuvToBgr(topY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((len - 1) * xStep)); + YuvConversion.YuvToBgr(topY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((len - 1) * xStep)); if (bottomY != null) { uv0 = ((3 * luv) + tluv + 0x00020002u) >> 2; - LossyUtils.YuvToBgr(bottomY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((len - 1) * xStep)); + YuvConversion.YuvToBgr(bottomY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((len - 1) * xStep)); } } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs b/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs index ed03c2e71d..24143785ab 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs @@ -299,5 +299,36 @@ private static int ClipUv(int uv, int rounding) uv = (uv + rounding + (128 << (YuvFix + 2))) >> (YuvFix + 2); return (uv & ~0xff) == 0 ? uv : uv < 0 ? 0 : 255; } + + [MethodImpl(InliningOptions.ShortMethod)] + public static uint LoadUv(byte u, byte v) => + (uint)(u | (v << 16)); // We process u and v together stashed into 32bit(16bit each). + + [MethodImpl(InliningOptions.ShortMethod)] + public static void YuvToBgr(int y, int u, int v, Span bgr) + { + bgr[0] = (byte)YuvToB(y, u); + bgr[1] = (byte)YuvToG(y, u, v); + bgr[2] = (byte)YuvToR(y, v); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public static int YuvToB(int y, int u) => Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685); + + [MethodImpl(InliningOptions.ShortMethod)] + public static int YuvToG(int y, int u, int v) => Clip8(MultHi(y, 19077) - MultHi(u, 6419) - MultHi(v, 13320) + 8708); + + [MethodImpl(InliningOptions.ShortMethod)] + public static int YuvToR(int y, int v) => Clip8(MultHi(y, 19077) + MultHi(v, 26149) - 14234); + + [MethodImpl(InliningOptions.ShortMethod)] + private static int MultHi(int v, int coeff) => (v * coeff) >> 8; + + [MethodImpl(InliningOptions.ShortMethod)] + private static byte Clip8(int v) + { + int yuvMask = (256 << 6) - 1; + return (byte)((v & ~yuvMask) == 0 ? v >> 6 : v < 0 ? 0 : 255); + } } } From 0c96e37ba639d1d44b64840c41f01455a53eb9af Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 17:39:50 +0100 Subject: [PATCH 53/85] Add Mean16x4 sse tests --- .../Formats/Webp/Lossy/LossyUtils.cs | 2 +- .../Formats/WebP/LossyUtilsTests.cs | 49 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index b2513feb55..74448cf528 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -15,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy internal static unsafe class LossyUtils { #if SUPPORTS_RUNTIME_INTRINSICS - private static readonly Vector128 Mean16x4Mask = Vector128.Create(0x00ff).AsByte(); + private static readonly Vector128 Mean16x4Mask = Vector128.Create((short)0x00ff).AsByte(); #endif [MethodImpl(InliningOptions.ShortMethod)] diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs new file mode 100644 index 0000000000..5062f845ba --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -0,0 +1,49 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.Linq; +using SixLabors.ImageSharp.Formats.Webp.Lossy; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Formats.WebP +{ + [Trait("Format", "Webp")] + public class LossyUtilsTests + { + private static void RunMean16x4Test() + { + // arrange + byte[] input = + { + 154, 145, 102, 115, 127, 129, 126, 125, 126, 120, 133, 152, 157, 153, 119, 94, 104, 116, 111, 113, + 113, 109, 105, 124, 173, 175, 177, 170, 175, 172, 166, 164, 151, 141, 99, 114, 125, 126, 135, 150, + 133, 115, 127, 149, 141, 168, 100, 54, 110, 117, 115, 116, 119, 115, 117, 130, 174, 174, 174, 157, + 146, 171, 166, 158, 117, 140, 96, 111, 119, 119, 136, 171, 188, 134, 121, 126, 136, 119, 59, 77, + 109, 115, 113, 120, 120, 117, 128, 115, 174, 173, 173, 161, 152, 148, 153, 162, 105, 140, 96, 114, + 115, 122, 141, 173, 190, 190, 142, 106, 151, 78, 66, 141, 110, 117, 123, 136, 118, 124, 127, 114, + 173, 175, 166, 155, 155, 159, 159, 158 + }; + uint[] dc = new uint[4]; + ushort[] tmp = new ushort[8]; + uint[] expectedDc = { 1940, 2139, 2252, 1813 }; + + // act + LossyUtils.Mean16x4(input, dc, tmp); + + // assert + Assert.True(dc.SequenceEqual(expectedDc)); + } + + [Fact] + public void Mean16x4_Works() => RunMean16x4Test(); + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void Mean16x4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.AllowAll); + + [Fact] + public void Mean16x4_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.DisableSSE2); +#endif + } +} From 90bab3939770a028a45e3d824dc6949fa124c492 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 8 Nov 2021 16:56:38 +1100 Subject: [PATCH 54/85] Special case La32 and L16 --- src/ImageSharp/Color/Color.Conversions.cs | 22 ++++++++++++++++++++++ src/ImageSharp/Color/Color.cs | 8 ++++++++ 2 files changed, 30 insertions(+) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index 96aa05c961..bf7869e53d 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -34,6 +34,28 @@ public Color(Rgb48 pixel) this.boxedHighPrecisionPixel = null; } + /// + /// Initializes a new instance of the struct. + /// + /// The containing the color information. + [MethodImpl(InliningOptions.ShortMethod)] + public Color(La32 pixel) + { + this.data = new Rgba64(pixel.L, pixel.L, pixel.L, pixel.A); + this.boxedHighPrecisionPixel = null; + } + + /// + /// Initializes a new instance of the struct. + /// + /// The containing the color information. + [MethodImpl(InliningOptions.ShortMethod)] + public Color(L16 pixel) + { + this.data = new Rgba64(pixel.PackedValue, pixel.PackedValue, pixel.PackedValue, ushort.MaxValue); + this.boxedHighPrecisionPixel = null; + } + /// /// Initializes a new instance of the struct. /// diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index c461d034eb..7c21d62ddf 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -117,6 +117,14 @@ public static Color FromPixel(TPixel pixel) { return new((Rgb48)(object)pixel); } + else if (typeof(TPixel) == typeof(La32)) + { + return new((La32)(object)pixel); + } + else if (typeof(TPixel) == typeof(L16)) + { + return new((L16)(object)pixel); + } else if (Unsafe.SizeOf() <= Unsafe.SizeOf()) { Rgba32 p = default; From 8d19c2881da8da3a7a88a569b6f7784bbc1c210c Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 10:41:52 +0100 Subject: [PATCH 55/85] Add sse2 version of Vp8Sse4X4 --- .../Formats/Webp/Lossy/LossyUtils.cs | 59 ++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index d5db3dffa5..82e2214701 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,11 +4,16 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal static class LossyUtils + internal static unsafe class LossyUtils { [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); @@ -17,7 +22,57 @@ internal static class LossyUtils public static int Vp8Sse16X8(Span a, Span b) => GetSse(a, b, 16, 8); [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Sse4X4(Span a, Span b) => GetSse(a, b, 4, 4); + public static int Vp8Sse4X4(Span a, Span b) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { +#pragma warning disable SA1503 // Braces should not be omitted + Span tmp = stackalloc int[4]; + fixed (byte* aPtr = a) + fixed (byte* bPtr = b) + fixed (int* tmpPtr = tmp) + { + // Load values. + Vector128 a0 = Sse2.LoadVector128(aPtr); + Vector128 a1 = Sse2.LoadVector128(aPtr + WebpConstants.Bps); + Vector128 a2 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 2)); + Vector128 a3 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 3)); + Vector128 b0 = Sse2.LoadVector128(bPtr); + Vector128 b1 = Sse2.LoadVector128(bPtr + WebpConstants.Bps); + Vector128 b2 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 2)); + Vector128 b3 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 3)); + + // Combine pair of lines. + Vector128 a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); + Vector128 a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32()); + Vector128 b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32()); + Vector128 b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32()); + + // Convert to 16b. + Vector128 a01s = Sse2.UnpackLow(a01.AsByte(), Vector128.Zero); + Vector128 a23s = Sse2.UnpackLow(a23.AsByte(), Vector128.Zero); + Vector128 b01s = Sse2.UnpackLow(b01.AsByte(), Vector128.Zero); + Vector128 b23s = Sse2.UnpackLow(b23.AsByte(), Vector128.Zero); + + // subtract, square and accumulate. + Vector128 d0 = Sse2.SubtractSaturate(a01s, b01s); + Vector128 d1 = Sse2.SubtractSaturate(a23s, b23s); + Vector128 e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); + Vector128 e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); + Vector128 sum = Sse2.Add(e0, e1); + + Sse2.Store(tmpPtr, sum); + return tmp[3] + tmp[2] + tmp[1] + tmp[0]; + } +#pragma warning restore SA1503 // Braces should not be omitted + } + else +#endif + { + return GetSse(a, b, 4, 4); + } + } [MethodImpl(InliningOptions.ShortMethod)] public static int GetSse(Span a, Span b, int w, int h) From 5c6e08b80c39f3cd4e24774ee66b5b011c41aa00 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 16:02:06 +0100 Subject: [PATCH 56/85] Avoid pinning of vp8 matrix data --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 169 +++++++++--------- 1 file changed, 85 insertions(+), 84 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index f935bd3ee0..b300b7b5c2 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -3,6 +3,7 @@ using System; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -537,99 +538,99 @@ public static int QuantizeBlock(Span input, Span output, Vp8Matrix if (Sse41.IsSupported) { #pragma warning disable SA1503 // Braces should not be omitted - fixed (ushort* mtxIqPtr = mtx.IQ) - fixed (ushort* mtxQPtr = mtx.Q) - fixed (uint* biasQPtr = mtx.Bias) - fixed (short* sharpenPtr = mtx.Sharpen) + // Load all inputs. + Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); + Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); + Vector128 iq0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(0, 8))); + Vector128 iq8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(8, 8))); + Vector128 q0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(0, 8))); + Vector128 q8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(8, 8))); + + // coeff = abs(in) + Vector128 coeff0 = Ssse3.Abs(input0); + Vector128 coeff8 = Ssse3.Abs(input8); + + // coeff = abs(in) + sharpen + Vector128 sharpen0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(0, 8))); + Vector128 sharpen8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(8, 8))); + Sse2.Add(coeff0.AsInt16(), sharpen0); + Sse2.Add(coeff8.AsInt16(), sharpen8); + + // out = (coeff * iQ + B) >> QFIX + // doing calculations with 32b precision (QFIX=17) + // out = (coeff * iQ) + Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); + Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); + Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); + Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); + Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); + Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); + Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); + Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); + + // out = (coeff * iQ + B) + Vector128 bias00 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(0, 4))); + Vector128 bias04 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(4, 4))); + Vector128 bias08 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(8, 4))); + Vector128 bias12 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(12, 4))); + out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); + out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); + out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); + out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); + + // out = QUANTDIV(coeff, iQ, B, QFIX) + out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); + out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); + out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); + out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); + + // pack result as 16b + Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); + Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); + + // if (coeff > 2047) coeff = 2047 + out0 = Sse2.Min(out0, MaxCoeff2047); + out8 = Sse2.Min(out8, MaxCoeff2047); + + // put sign back + out0 = Ssse3.Sign(out0, input0); + out8 = Ssse3.Sign(out8, input8); + + // in = out * Q + input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); + input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); + fixed (short* inputPtr = input) - fixed (short* outputPtr = output) { - // Load all inputs. - Vector128 input0 = Sse2.LoadVector128(inputPtr); - Vector128 input8 = Sse2.LoadVector128(inputPtr + 8); - Vector128 iq0 = Sse2.LoadVector128(mtxIqPtr); - Vector128 iq8 = Sse2.LoadVector128(mtxIqPtr + 8); - Vector128 q0 = Sse2.LoadVector128(mtxQPtr); - Vector128 q8 = Sse2.LoadVector128(mtxQPtr + 8); - - // coeff = abs(in) - Vector128 coeff0 = Ssse3.Abs(input0); - Vector128 coeff8 = Ssse3.Abs(input8); - - // coeff = abs(in) + sharpen - Vector128 sharpen0 = Sse2.LoadVector128(sharpenPtr); - Vector128 sharpen8 = Sse2.LoadVector128(sharpenPtr + 8); - Sse2.Add(coeff0.AsInt16(), sharpen0); - Sse2.Add(coeff8.AsInt16(), sharpen8); - - // out = (coeff * iQ + B) >> QFIX - // doing calculations with 32b precision (QFIX=17) - // out = (coeff * iQ) - Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); - Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); - Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); - Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); - Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); - Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); - Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); - Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); - - // out = (coeff * iQ + B) - Vector128 bias00 = Sse2.LoadVector128(biasQPtr); - Vector128 bias04 = Sse2.LoadVector128(biasQPtr + 4); - Vector128 bias08 = Sse2.LoadVector128(biasQPtr + 8); - Vector128 bias12 = Sse2.LoadVector128(biasQPtr + 12); - out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); - out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); - out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); - out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); - - // out = QUANTDIV(coeff, iQ, B, QFIX) - out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); - out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); - out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); - out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); - - // pack result as 16b - Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); - Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); - - // if (coeff > 2047) coeff = 2047 - out0 = Sse2.Min(out0, MaxCoeff2047); - out8 = Sse2.Min(out8, MaxCoeff2047); - - // put sign back - out0 = Ssse3.Sign(out0, input0); - out8 = Ssse3.Sign(out8, input8); - - // in = out * Q - input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); - input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); - // in = out * Q Sse2.Store(inputPtr, input0); Sse2.Store(inputPtr + 8, input8); + } - // zigzag the output before storing it. The re-ordering is: - // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 - // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 - // There's only two misplaced entries ([8] and [7]) that are crossing the - // reg's boundaries. - // We use pshufb instead of pshuflo/pshufhi. - Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); - Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 - Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); - Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 - Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); - Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + // zigzag the output before storing it. The re-ordering is: + // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 + // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 + // There's only two misplaced entries ([8] and [7]) that are crossing the + // reg's boundaries. + // We use pshufb instead of pshuflo/pshufhi. + Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); + Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 + Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); + Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 + Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); + Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + + fixed (short* outputPtr = output) + { Sse2.Store(outputPtr, outZ0.AsInt16()); Sse2.Store(outputPtr + 8, outZ8.AsInt16()); - Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); - - // Detect if all 'out' values are zeroes or not. - Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); - return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; } + + Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); + + // Detect if all 'out' values are zeroes or not. + Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); + return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; #pragma warning restore SA1503 // Braces should not be omitted } else From 0c0812de82648be40a35dc63a9b6c914bdcbbbf7 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 16:58:40 +0100 Subject: [PATCH 57/85] Avoid pinning input and output data --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index b300b7b5c2..6e25dc003c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -600,12 +600,10 @@ public static int QuantizeBlock(Span input, Span output, Vp8Matrix input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); - fixed (short* inputPtr = input) - { - // in = out * Q - Sse2.Store(inputPtr, input0); - Sse2.Store(inputPtr + 8, input8); - } + // in = out * Q + ref short inputRef = ref MemoryMarshal.GetReference(input); + Unsafe.As>(ref inputRef) = input0; + Unsafe.As>(ref Unsafe.Add(ref inputRef, 8)) = input8; // zigzag the output before storing it. The re-ordering is: // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 @@ -620,11 +618,9 @@ public static int QuantizeBlock(Span input, Span output, Vp8Matrix Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); - fixed (short* outputPtr = output) - { - Sse2.Store(outputPtr, outZ0.AsInt16()); - Sse2.Store(outputPtr + 8, outZ8.AsInt16()); - } + ref short outputRef = ref MemoryMarshal.GetReference(output); + Unsafe.As>(ref outputRef) = outZ0.AsInt16(); + Unsafe.As>(ref Unsafe.Add(ref outputRef, 8)) = outZ8.AsInt16(); Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); From cffa4b0c366a3d80b7e5c315127ae0a27f1ddb8d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 17:00:18 +0100 Subject: [PATCH 58/85] Only test with and without HardwareIntrinsics --- tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index 280a7902ae..d0cdfc1ded 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -44,13 +44,7 @@ private static void RunQuantizeBlockTest() public void QuantizeBlock_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.AllowAll); [Fact] - public void QuantizeBlock_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2); - - [Fact] - public void QuantizeBlock_WithoutSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSSE3); - - [Fact] - public void QuantizeBlock_WithoutSSE2AndSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3); + public void QuantizeBlock_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableHWIntrinsic); #endif } } From c9fc5cdb56a21deaf78ae4eb73a6e8270c951841 Mon Sep 17 00:00:00 2001 From: Berkan Diler Date: Mon, 8 Nov 2021 18:33:24 +0100 Subject: [PATCH 59/85] Collapse AsSpan().Slice(..) calls into AsSpan(..) --- src/ImageSharp/Formats/Png/PngDecoderCore.cs | 2 +- src/ImageSharp/Formats/Webp/WebpDecoderCore.cs | 2 +- src/ImageSharp/IO/ChunkedMemoryStream.cs | 4 ++-- .../Processors/Transforms/Resize/ResizeKernelMap.cs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs index 987dc150c2..cf3cd7eb14 100644 --- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs +++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs @@ -1071,7 +1071,7 @@ private bool TryUncompressTextData(ReadOnlySpan compressedData, Encoding e int bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length); while (bytesRead != 0) { - uncompressedBytes.AddRange(this.buffer.AsSpan().Slice(0, bytesRead).ToArray()); + uncompressedBytes.AddRange(this.buffer.AsSpan(0, bytesRead).ToArray()); bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length); } diff --git a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs index 44a55a4c65..09071406c5 100644 --- a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs @@ -306,7 +306,7 @@ private WebpImageInfo ReadVp8Header(WebpFeatures features = null) // Check for VP8 magic bytes. this.currentStream.Read(this.buffer, 0, 3); - if (!this.buffer.AsSpan().Slice(0, 3).SequenceEqual(WebpConstants.Vp8HeaderMagicBytes)) + if (!this.buffer.AsSpan(0, 3).SequenceEqual(WebpConstants.Vp8HeaderMagicBytes)) { WebpThrowHelper.ThrowImageFormatException("VP8 magic bytes not found"); } diff --git a/src/ImageSharp/IO/ChunkedMemoryStream.cs b/src/ImageSharp/IO/ChunkedMemoryStream.cs index b9220c56ab..e28baf879d 100644 --- a/src/ImageSharp/IO/ChunkedMemoryStream.cs +++ b/src/ImageSharp/IO/ChunkedMemoryStream.cs @@ -243,7 +243,7 @@ public override int Read(byte[] buffer, int offset, int count) const string bufferMessage = "Offset subtracted from the buffer length is less than count."; Guard.IsFalse(buffer.Length - offset < count, nameof(buffer), bufferMessage); - return this.ReadImpl(buffer.AsSpan().Slice(offset, count)); + return this.ReadImpl(buffer.AsSpan(offset, count)); } #if SUPPORTS_SPAN_STREAM @@ -359,7 +359,7 @@ public override void Write(byte[] buffer, int offset, int count) const string bufferMessage = "Offset subtracted from the buffer length is less than count."; Guard.IsFalse(buffer.Length - offset < count, nameof(buffer), bufferMessage); - this.WriteImpl(buffer.AsSpan().Slice(offset, count)); + this.WriteImpl(buffer.AsSpan(offset, count)); } #if SUPPORTS_SPAN_STREAM diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs index a58c20f687..9cc4680602 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs @@ -216,7 +216,7 @@ private ResizeKernel BuildKernel(in TResampler sampler, int destRowI ResizeKernel kernel = this.CreateKernel(dataRowIndex, left, right); - Span kernelValues = this.tempValues.AsSpan().Slice(0, kernel.Length); + Span kernelValues = this.tempValues.AsSpan(0, kernel.Length); double sum = 0; for (int j = left; j <= right; j++) From 670e2eeafc14b7c16757f1b909eb552a9e61b1ca Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 9 Nov 2021 11:43:19 +1100 Subject: [PATCH 60/85] Update ColorTests.CastTo.cs --- .../ImageSharp.Tests/Color/ColorTests.CastTo.cs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs index af35d1f895..3003265ca6 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs @@ -90,16 +90,25 @@ public void Bgr24() } [Fact] - public void TPixel() + public void GenericPixel() { - var source = new RgbaVector(float.Epsilon, 2 * float.Epsilon, float.MaxValue, float.MinValue); + AssertGenericPixel(new RgbaVector(float.Epsilon, 2 * float.Epsilon, float.MaxValue, float.MinValue)); + AssertGenericPixel(new Rgba64(1, 2, ushort.MaxValue, ushort.MaxValue - 1)); + AssertGenericPixel(new Rgb48(1, 2, ushort.MaxValue - 1)); + AssertGenericPixel(new La32(1, ushort.MaxValue - 1)); + AssertGenericPixel(new L16(ushort.MaxValue - 1)); + AssertGenericPixel(new Rgba32(1, 2, 255, 254)); + } + private static void AssertGenericPixel(TPixel source) + where TPixel : unmanaged, IPixel + { // Act: var color = Color.FromPixel(source); // Assert: - RgbaVector data = color.ToPixel(); - Assert.Equal(source, data); + TPixel actual = color.ToPixel(); + Assert.Equal(source, actual); } } } From cb513a905c52e843440f14c70e40fe9192737e91 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 11:05:18 +0100 Subject: [PATCH 61/85] Use fixed sized arrays in Vp8Matrix --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 20 ++++---- .../Formats/Webp/Lossy/Vp8Encoder.cs | 8 +--- .../Formats/Webp/Lossy/Vp8Matrix.cs | 47 +++++-------------- .../Formats/Webp/Lossy/Vp8SegmentInfo.cs | 12 ++--- .../Formats/WebP/QuantEncTests.cs | 17 ++++--- 5 files changed, 41 insertions(+), 63 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 6e25dc003c..4c3a2ff5e3 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -541,18 +541,18 @@ public static int QuantizeBlock(Span input, Span output, Vp8Matrix // Load all inputs. Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); - Vector128 iq0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(0, 8))); - Vector128 iq8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(8, 8))); - Vector128 q0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(0, 8))); - Vector128 q8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(8, 8))); + Vector128 iq0 = Unsafe.As>(ref mtx.IQ[0]); + Vector128 iq8 = Unsafe.As>(ref mtx.IQ[8]); + Vector128 q0 = Unsafe.As>(ref mtx.Q[0]); + Vector128 q8 = Unsafe.As>(ref mtx.Q[8]); // coeff = abs(in) Vector128 coeff0 = Ssse3.Abs(input0); Vector128 coeff8 = Ssse3.Abs(input8); // coeff = abs(in) + sharpen - Vector128 sharpen0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(0, 8))); - Vector128 sharpen8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(8, 8))); + Vector128 sharpen0 = Unsafe.As>(ref mtx.Sharpen[0]); + Vector128 sharpen8 = Unsafe.As>(ref mtx.Sharpen[8]); Sse2.Add(coeff0.AsInt16(), sharpen0); Sse2.Add(coeff8.AsInt16(), sharpen8); @@ -569,10 +569,10 @@ public static int QuantizeBlock(Span input, Span output, Vp8Matrix Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); // out = (coeff * iQ + B) - Vector128 bias00 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(0, 4))); - Vector128 bias04 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(4, 4))); - Vector128 bias08 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(8, 4))); - Vector128 bias12 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(12, 4))); + Vector128 bias00 = Unsafe.As>(ref mtx.Bias[0]); + Vector128 bias04 = Unsafe.As>(ref mtx.Bias[4]); + Vector128 bias08 = Unsafe.As>(ref mtx.Bias[8]); + Vector128 bias12 = Unsafe.As>(ref mtx.Bias[12]); out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 728574682f..8a4115d216 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -502,7 +502,7 @@ private void SetLoopParams(float q) this.ResetStats(); } - private void AdjustFilterStrength() + private unsafe void AdjustFilterStrength() { if (this.filterStrength > 0) { @@ -806,7 +806,7 @@ private void ResetStats() proba.NbSkip = 0; } - private void SetupMatrices(Vp8SegmentInfo[] dqm) + private unsafe void SetupMatrices(Vp8SegmentInfo[] dqm) { int tlambdaScale = this.method >= WebpEncodingMethod.Default ? this.spatialNoiseShaping : 0; for (int i = 0; i < dqm.Length; i++) @@ -814,10 +814,6 @@ private void SetupMatrices(Vp8SegmentInfo[] dqm) Vp8SegmentInfo m = dqm[i]; int q = m.Quant; - m.Y1 = new Vp8Matrix(); - m.Y2 = new Vp8Matrix(); - m.Uv = new Vp8Matrix(); - m.Y1.Q[0] = WebpLookupTables.DcTable[Numerics.Clamp(q + this.DqY1Dc, 0, 127)]; m.Y1.Q[1] = WebpLookupTables.AcTable[Numerics.Clamp(q, 0, 127)]; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs index e525e388b8..66c91e44ad 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs @@ -3,7 +3,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal class Vp8Matrix + internal unsafe struct Vp8Matrix { private static readonly int[][] BiasMatrices = { @@ -23,50 +23,29 @@ internal class Vp8Matrix private const int SharpenBits = 11; /// - /// Initializes a new instance of the class. + /// The quantizer steps. /// - public Vp8Matrix() - { - this.Q = new ushort[16]; - this.IQ = new ushort[16]; - this.Bias = new uint[16]; - this.ZThresh = new uint[16]; - this.Sharpen = new short[16]; - } - - public Vp8Matrix(ushort[] q, ushort[] iq, uint[] bias, uint[] zThresh, short[] sharpen) - { - this.Q = q; - this.IQ = iq; - this.Bias = bias; - this.ZThresh = zThresh; - this.Sharpen = sharpen; - } - - /// - /// Gets the quantizer steps. - /// - public ushort[] Q { get; } + public fixed ushort Q[16]; /// - /// Gets the reciprocals, fixed point. + /// The reciprocals, fixed point. /// - public ushort[] IQ { get; } + public fixed ushort IQ[16]; /// - /// Gets the rounding bias. + /// The rounding bias. /// - public uint[] Bias { get; } + public fixed uint Bias[16]; /// - /// Gets the value below which a coefficient is zeroed. + /// The value below which a coefficient is zeroed. /// - public uint[] ZThresh { get; } + public fixed uint ZThresh[16]; /// - /// Gets the frequency boosters for slight sharpening. + /// The frequency boosters for slight sharpening. /// - public short[] Sharpen { get; } + public fixed short Sharpen[16]; /// /// Returns the average quantizer. @@ -81,7 +60,7 @@ public int Expand(int type) int isAcCoeff = i > 0 ? 1 : 0; int bias = BiasMatrices[type][isAcCoeff]; this.IQ[i] = (ushort)((1 << WebpConstants.QFix) / this.Q[i]); - this.Bias[i] = (uint)this.BIAS(bias); + this.Bias[i] = (uint)BIAS(bias); // zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is: // * zero if coeff <= zthresh @@ -115,6 +94,6 @@ public int Expand(int type) return (sum + 8) >> 4; } - private int BIAS(int b) => b << (WebpConstants.QFix - 8); + private static int BIAS(int b) => b << (WebpConstants.QFix - 8); } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs index cf2a5c1775..71983055c0 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs @@ -8,19 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy internal class Vp8SegmentInfo { /// - /// Gets or sets the quantization matrix y1. + /// Gets the quantization matrix y1. /// - public Vp8Matrix Y1 { get; set; } + public Vp8Matrix Y1; /// - /// Gets or sets the quantization matrix y2. + /// Gets the quantization matrix y2. /// - public Vp8Matrix Y2 { get; set; } + public Vp8Matrix Y2; /// - /// Gets or sets the quantization matrix uv. + /// Gets the quantization matrix uv. /// - public Vp8Matrix Uv { get; set; } + public Vp8Matrix Uv; /// /// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness. diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index d0cdfc1ded..7465c42cef 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -11,22 +11,25 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP [Trait("Format", "Webp")] public class QuantEncTests { - private static void RunQuantizeBlockTest() + private static unsafe void RunQuantizeBlockTest() { // arrange short[] input = { 378, 777, -851, 888, 259, 148, 0, -111, -185, -185, -74, -37, 148, 74, 111, 74 }; short[] output = new short[16]; ushort[] q = { 42, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37 }; ushort[] iq = { 3120, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542 }; - uint[] bias = - { - 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, - 55296, 55296 - }; + uint[] bias = { 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296 }; uint[] zthresh = { 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21 }; short[] expectedOutput = { 9, 21, 7, -5, 4, -23, 24, 0, -5, 4, 2, -2, -3, -1, 3, 2 }; int expectedResult = 1; - var vp8Matrix = new Vp8Matrix(q, iq, bias, zthresh, new short[16]); + Vp8Matrix vp8Matrix = default; + for (int i = 0; i < 16; i++) + { + vp8Matrix.Q[i] = q[i]; + vp8Matrix.IQ[i] = iq[i]; + vp8Matrix.Bias[i] = bias[i]; + vp8Matrix.ZThresh[i] = zthresh[i]; + } // act int actualResult = QuantEnc.QuantizeBlock(input, output, vp8Matrix); From 3c9c1bb23eb63863fcac38ac4478f097d73e1e0f Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 11:21:18 +0100 Subject: [PATCH 62/85] Avoid pinning --- .../Formats/Webp/Lossy/LossyUtils.cs | 48 +++++++++---------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 74448cf528..6de2989bda 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,6 +4,7 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -814,33 +815,28 @@ public static void Mean16x4(Span input, Span dc, Span tmp) #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) { -#pragma warning disable SA1503 // Braces should not be omitted tmp.Clear(); - fixed (byte* inputPtr = input) - fixed (ushort* tmpPtr = tmp) - { - Vector128 a0 = Sse2.LoadVector128(inputPtr); - Vector128 a1 = Sse2.LoadVector128(inputPtr + WebpConstants.Bps); - Vector128 a2 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 2)); - Vector128 a3 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 3)); - Vector128 b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte - Vector128 b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8); - Vector128 b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8); - Vector128 b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8); - Vector128 c0 = Sse2.And(a0, Mean16x4Mask); // lo byte - Vector128 c1 = Sse2.And(a1, Mean16x4Mask); - Vector128 c2 = Sse2.And(a2, Mean16x4Mask); - Vector128 c3 = Sse2.And(a3, Mean16x4Mask); - Vector128 d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32()); - Vector128 d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32()); - Vector128 d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32()); - Vector128 d3 = Sse2.Add(b3.AsInt32(), c3.AsInt32()); - Vector128 e0 = Sse2.Add(d0, d1); - Vector128 e1 = Sse2.Add(d2, d3); - Vector128 f0 = Sse2.Add(e0, e1); - Sse2.Store(tmpPtr, f0.AsUInt16()); - } -#pragma warning restore SA1503 // Braces should not be omitted + Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); + Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps, 16))); + Vector128 a2 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps * 2, 16))); + Vector128 a3 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps * 3, 16))); + Vector128 b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte + Vector128 b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8); + Vector128 b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8); + Vector128 b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8); + Vector128 c0 = Sse2.And(a0, Mean16x4Mask); // lo byte + Vector128 c1 = Sse2.And(a1, Mean16x4Mask); + Vector128 c2 = Sse2.And(a2, Mean16x4Mask); + Vector128 c3 = Sse2.And(a3, Mean16x4Mask); + Vector128 d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32()); + Vector128 d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32()); + Vector128 d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32()); + Vector128 d3 = Sse2.Add(b3.AsInt32(), c3.AsInt32()); + Vector128 e0 = Sse2.Add(d0, d1); + Vector128 e1 = Sse2.Add(d2, d3); + Vector128 f0 = Sse2.Add(e0, e1); + ref ushort outputRef = ref MemoryMarshal.GetReference(tmp); + Unsafe.As>(ref outputRef) = f0.AsUInt16(); dc[0] = (uint)(tmp[1] + tmp[0]); dc[1] = (uint)(tmp[3] + tmp[2]); From 6e135cbd79f391f56ee69df0da2b8be505631491 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 12:38:41 +0100 Subject: [PATCH 63/85] Avoid pinning --- .../Formats/Webp/Lossy/LossyUtils.cs | 219 +++++++++--------- 1 file changed, 107 insertions(+), 112 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index b8f232a43b..ee224e0b0b 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,6 +4,7 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -614,120 +615,114 @@ public static int TTransformSse41(Span inputA, Span inputB, Span sum = scratch.Slice(0, 4); sum.Clear(); -#pragma warning disable SA1503 // Braces should not be omitted - fixed (byte* inputAPtr = inputA) - fixed (byte* inputBPtr = inputB) - fixed (ushort* wPtr = w) - fixed (int* outputPtr = sum) - { - // Load and combine inputs. - Vector128 ina0 = Sse2.LoadVector128(inputAPtr); - Vector128 ina1 = Sse2.LoadVector128(inputAPtr + (WebpConstants.Bps * 1)); - Vector128 ina2 = Sse2.LoadVector128(inputAPtr + (WebpConstants.Bps * 2)); - Vector128 ina3 = Sse2.LoadVector128((long*)(inputAPtr + (WebpConstants.Bps * 3))); - Vector128 inb0 = Sse2.LoadVector128(inputBPtr); - Vector128 inb1 = Sse2.LoadVector128(inputBPtr + (WebpConstants.Bps * 1)); - Vector128 inb2 = Sse2.LoadVector128(inputBPtr + (WebpConstants.Bps * 2)); - Vector128 inb3 = Sse2.LoadVector128((long*)(inputBPtr + (WebpConstants.Bps * 3))); - - // Combine inA and inB (we'll do two transforms in parallel). - Vector128 inab0 = Sse2.UnpackLow(ina0.AsInt32(), inb0.AsInt32()); - Vector128 inab1 = Sse2.UnpackLow(ina1.AsInt32(), inb1.AsInt32()); - Vector128 inab2 = Sse2.UnpackLow(ina2.AsInt32(), inb2.AsInt32()); - Vector128 inab3 = Sse2.UnpackLow(ina3.AsInt32(), inb3.AsInt32()); - Vector128 tmp0 = Sse41.ConvertToVector128Int16(inab0.AsByte()); - Vector128 tmp1 = Sse41.ConvertToVector128Int16(inab1.AsByte()); - Vector128 tmp2 = Sse41.ConvertToVector128Int16(inab2.AsByte()); - Vector128 tmp3 = Sse41.ConvertToVector128Int16(inab3.AsByte()); - - // a00 a01 a02 a03 b00 b01 b02 b03 - // a10 a11 a12 a13 b10 b11 b12 b13 - // a20 a21 a22 a23 b20 b21 b22 b23 - // a30 a31 a32 a33 b30 b31 b32 b33 - // Vertical pass first to avoid a transpose (vertical and horizontal passes - // are commutative because w/kWeightY is symmetric) and subsequent transpose. - // Calculate a and b (two 4x4 at once). - Vector128 a0 = Sse2.Add(tmp0, tmp2); - Vector128 a1 = Sse2.Add(tmp1, tmp3); - Vector128 a2 = Sse2.Subtract(tmp1, tmp3); - Vector128 a3 = Sse2.Subtract(tmp0, tmp2); - Vector128 b0 = Sse2.Add(a0, a1); - Vector128 b1 = Sse2.Add(a3, a2); - Vector128 b2 = Sse2.Subtract(a3, a2); - Vector128 b3 = Sse2.Subtract(a0, a1); - - // a00 a01 a02 a03 b00 b01 b02 b03 - // a10 a11 a12 a13 b10 b11 b12 b13 - // a20 a21 a22 a23 b20 b21 b22 b23 - // a30 a31 a32 a33 b30 b31 b32 b33 - // Transpose the two 4x4. - Vector128 transpose00 = Sse2.UnpackLow(b0, b1); - Vector128 transpose01 = Sse2.UnpackLow(b2, b3); - Vector128 transpose02 = Sse2.UnpackHigh(b0, b1); - Vector128 transpose03 = Sse2.UnpackHigh(b2, b3); - - // a00 a10 a01 a11 a02 a12 a03 a13 - // a20 a30 a21 a31 a22 a32 a23 a33 - // b00 b10 b01 b11 b02 b12 b03 b13 - // b20 b30 b21 b31 b22 b32 b23 b33 - Vector128 transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32()); - Vector128 transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32()); - Vector128 transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32()); - Vector128 transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32()); - - // a00 a10 a20 a30 a01 a11 a21 a31 - // b00 b10 b20 b30 b01 b11 b21 b31 - // a02 a12 a22 a32 a03 a13 a23 a33 - // b02 b12 a22 b32 b03 b13 b23 b33 - Vector128 output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64()); - Vector128 output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64()); - Vector128 output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64()); - Vector128 output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64()); - - // a00 a10 a20 a30 b00 b10 b20 b30 - // a01 a11 a21 a31 b01 b11 b21 b31 - // a02 a12 a22 a32 b02 b12 b22 b32 - // a03 a13 a23 a33 b03 b13 b23 b33 - // Horizontal pass and difference of weighted sums. - Vector128 w0 = Sse2.LoadVector128(wPtr); - Vector128 w8 = Sse2.LoadVector128(wPtr + 8); - - // Calculate a and b (two 4x4 at once). - a0 = Sse2.Add(output0.AsInt16(), output2.AsInt16()); - a1 = Sse2.Add(output1.AsInt16(), output3.AsInt16()); - a2 = Sse2.Subtract(output1.AsInt16(), output3.AsInt16()); - a3 = Sse2.Subtract(output0.AsInt16(), output2.AsInt16()); - b0 = Sse2.Add(a0, a1); - b1 = Sse2.Add(a3, a2); - b2 = Sse2.Subtract(a3, a2); - b3 = Sse2.Subtract(a0, a1); - - // Separate the transforms of inA and inB. - Vector128 ab0 = Sse2.UnpackLow(b0.AsInt64(), b1.AsInt64()); - Vector128 ab2 = Sse2.UnpackLow(b2.AsInt64(), b3.AsInt64()); - Vector128 bb0 = Sse2.UnpackHigh(b0.AsInt64(), b1.AsInt64()); - Vector128 bb2 = Sse2.UnpackHigh(b2.AsInt64(), b3.AsInt64()); - - Vector128 ab0Abs = Ssse3.Abs(ab0.AsInt16()); - Vector128 ab2Abs = Ssse3.Abs(ab2.AsInt16()); - Vector128 b0Abs = Ssse3.Abs(bb0.AsInt16()); - Vector128 bb2Abs = Ssse3.Abs(bb2.AsInt16()); - - // weighted sums. - Vector128 ab0mulw0 = Sse2.MultiplyAddAdjacent(ab0Abs.AsInt16(), w0.AsInt16()); - Vector128 ab2mulw8 = Sse2.MultiplyAddAdjacent(ab2Abs.AsInt16(), w8.AsInt16()); - Vector128 b0mulw0 = Sse2.MultiplyAddAdjacent(b0Abs.AsInt16(), w0.AsInt16()); - Vector128 bb2mulw8 = Sse2.MultiplyAddAdjacent(bb2Abs.AsInt16(), w8.AsInt16()); - Vector128 ab0ab2Sum = Sse2.Add(ab0mulw0, ab2mulw8); - Vector128 b0w0bb2w8Sum = Sse2.Add(b0mulw0, bb2mulw8); - - // difference of weighted sums. - Vector128 result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32()); - Sse2.Store(outputPtr, result.AsInt32()); - } + // Load and combine inputs. + Vector128 ina0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA)); + Vector128 ina1 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps, 16))); + Vector128 ina2 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps * 2, 16))); + Vector128 ina3 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps * 3, 16))).AsInt64(); + Vector128 inb0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB)); + Vector128 inb1 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps, 16))); + Vector128 inb2 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps * 2, 16))); + Vector128 inb3 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps * 3, 16))).AsInt64(); + + // Combine inA and inB (we'll do two transforms in parallel). + Vector128 inab0 = Sse2.UnpackLow(ina0.AsInt32(), inb0.AsInt32()); + Vector128 inab1 = Sse2.UnpackLow(ina1.AsInt32(), inb1.AsInt32()); + Vector128 inab2 = Sse2.UnpackLow(ina2.AsInt32(), inb2.AsInt32()); + Vector128 inab3 = Sse2.UnpackLow(ina3.AsInt32(), inb3.AsInt32()); + Vector128 tmp0 = Sse41.ConvertToVector128Int16(inab0.AsByte()); + Vector128 tmp1 = Sse41.ConvertToVector128Int16(inab1.AsByte()); + Vector128 tmp2 = Sse41.ConvertToVector128Int16(inab2.AsByte()); + Vector128 tmp3 = Sse41.ConvertToVector128Int16(inab3.AsByte()); + + // a00 a01 a02 a03 b00 b01 b02 b03 + // a10 a11 a12 a13 b10 b11 b12 b13 + // a20 a21 a22 a23 b20 b21 b22 b23 + // a30 a31 a32 a33 b30 b31 b32 b33 + // Vertical pass first to avoid a transpose (vertical and horizontal passes + // are commutative because w/kWeightY is symmetric) and subsequent transpose. + // Calculate a and b (two 4x4 at once). + Vector128 a0 = Sse2.Add(tmp0, tmp2); + Vector128 a1 = Sse2.Add(tmp1, tmp3); + Vector128 a2 = Sse2.Subtract(tmp1, tmp3); + Vector128 a3 = Sse2.Subtract(tmp0, tmp2); + Vector128 b0 = Sse2.Add(a0, a1); + Vector128 b1 = Sse2.Add(a3, a2); + Vector128 b2 = Sse2.Subtract(a3, a2); + Vector128 b3 = Sse2.Subtract(a0, a1); + + // a00 a01 a02 a03 b00 b01 b02 b03 + // a10 a11 a12 a13 b10 b11 b12 b13 + // a20 a21 a22 a23 b20 b21 b22 b23 + // a30 a31 a32 a33 b30 b31 b32 b33 + // Transpose the two 4x4. + Vector128 transpose00 = Sse2.UnpackLow(b0, b1); + Vector128 transpose01 = Sse2.UnpackLow(b2, b3); + Vector128 transpose02 = Sse2.UnpackHigh(b0, b1); + Vector128 transpose03 = Sse2.UnpackHigh(b2, b3); + + // a00 a10 a01 a11 a02 a12 a03 a13 + // a20 a30 a21 a31 a22 a32 a23 a33 + // b00 b10 b01 b11 b02 b12 b03 b13 + // b20 b30 b21 b31 b22 b32 b23 b33 + Vector128 transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32()); + Vector128 transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32()); + Vector128 transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32()); + Vector128 transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32()); + + // a00 a10 a20 a30 a01 a11 a21 a31 + // b00 b10 b20 b30 b01 b11 b21 b31 + // a02 a12 a22 a32 a03 a13 a23 a33 + // b02 b12 a22 b32 b03 b13 b23 b33 + Vector128 output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64()); + Vector128 output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64()); + Vector128 output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64()); + Vector128 output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64()); + + // a00 a10 a20 a30 b00 b10 b20 b30 + // a01 a11 a21 a31 b01 b11 b21 b31 + // a02 a12 a22 a32 b02 b12 b22 b32 + // a03 a13 a23 a33 b03 b13 b23 b33 + // Horizontal pass and difference of weighted sums. + Vector128 w0 = Unsafe.As>(ref MemoryMarshal.GetReference(w)); + Vector128 w8 = Unsafe.As>(ref MemoryMarshal.GetReference(w.Slice(8, 8))); + + // Calculate a and b (two 4x4 at once). + a0 = Sse2.Add(output0.AsInt16(), output2.AsInt16()); + a1 = Sse2.Add(output1.AsInt16(), output3.AsInt16()); + a2 = Sse2.Subtract(output1.AsInt16(), output3.AsInt16()); + a3 = Sse2.Subtract(output0.AsInt16(), output2.AsInt16()); + b0 = Sse2.Add(a0, a1); + b1 = Sse2.Add(a3, a2); + b2 = Sse2.Subtract(a3, a2); + b3 = Sse2.Subtract(a0, a1); + + // Separate the transforms of inA and inB. + Vector128 ab0 = Sse2.UnpackLow(b0.AsInt64(), b1.AsInt64()); + Vector128 ab2 = Sse2.UnpackLow(b2.AsInt64(), b3.AsInt64()); + Vector128 bb0 = Sse2.UnpackHigh(b0.AsInt64(), b1.AsInt64()); + Vector128 bb2 = Sse2.UnpackHigh(b2.AsInt64(), b3.AsInt64()); + + Vector128 ab0Abs = Ssse3.Abs(ab0.AsInt16()); + Vector128 ab2Abs = Ssse3.Abs(ab2.AsInt16()); + Vector128 b0Abs = Ssse3.Abs(bb0.AsInt16()); + Vector128 bb2Abs = Ssse3.Abs(bb2.AsInt16()); + + // weighted sums. + Vector128 ab0mulw0 = Sse2.MultiplyAddAdjacent(ab0Abs.AsInt16(), w0.AsInt16()); + Vector128 ab2mulw8 = Sse2.MultiplyAddAdjacent(ab2Abs.AsInt16(), w8.AsInt16()); + Vector128 b0mulw0 = Sse2.MultiplyAddAdjacent(b0Abs.AsInt16(), w0.AsInt16()); + Vector128 bb2mulw8 = Sse2.MultiplyAddAdjacent(bb2Abs.AsInt16(), w8.AsInt16()); + Vector128 ab0ab2Sum = Sse2.Add(ab0mulw0, ab2mulw8); + Vector128 b0w0bb2w8Sum = Sse2.Add(b0mulw0, bb2mulw8); + + // difference of weighted sums. + Vector128 result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32()); + + ref int outputRef = ref MemoryMarshal.GetReference(sum); + Unsafe.As>(ref outputRef) = result.AsInt32(); return sum[3] + sum[2] + sum[1] + sum[0]; -#pragma warning restore SA1503 // Braces should not be omitted } #endif From d6d1868343831184d94482895e5f4d3837e643cf Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 12:40:27 +0100 Subject: [PATCH 64/85] Test Hadamard transform only with and without HardwareIntrinsics --- tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs index 349a0c8fca..f8b488fde5 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -45,13 +45,7 @@ private static void RunHadamardTransformTest() public void HadamardTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.AllowAll); [Fact] - public void HadamardTransform_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE2); - - [Fact] - public void HadamardTransform_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE41); - - [Fact] - public void HadamardTransform_WithoutSSE2AndSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE41 | HwIntrinsics.DisableSSE2); + public void HadamardTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableHWIntrinsic); #endif } From 99a3510e279a38a8c7c733d1c29f63fb3772d49d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 12:53:54 +0100 Subject: [PATCH 65/85] Avoid pinning --- .../Formats/Webp/Lossy/LossyUtils.cs | 72 +++++++++---------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 82e2214701..aa35f9673c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,6 +4,7 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Numerics; using System.Runtime.Intrinsics; @@ -27,45 +28,40 @@ public static int Vp8Sse4X4(Span a, Span b) #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) { -#pragma warning disable SA1503 // Braces should not be omitted Span tmp = stackalloc int[4]; - fixed (byte* aPtr = a) - fixed (byte* bPtr = b) - fixed (int* tmpPtr = tmp) - { - // Load values. - Vector128 a0 = Sse2.LoadVector128(aPtr); - Vector128 a1 = Sse2.LoadVector128(aPtr + WebpConstants.Bps); - Vector128 a2 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 2)); - Vector128 a3 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 3)); - Vector128 b0 = Sse2.LoadVector128(bPtr); - Vector128 b1 = Sse2.LoadVector128(bPtr + WebpConstants.Bps); - Vector128 b2 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 2)); - Vector128 b3 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 3)); - - // Combine pair of lines. - Vector128 a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); - Vector128 a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32()); - Vector128 b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32()); - Vector128 b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32()); - - // Convert to 16b. - Vector128 a01s = Sse2.UnpackLow(a01.AsByte(), Vector128.Zero); - Vector128 a23s = Sse2.UnpackLow(a23.AsByte(), Vector128.Zero); - Vector128 b01s = Sse2.UnpackLow(b01.AsByte(), Vector128.Zero); - Vector128 b23s = Sse2.UnpackLow(b23.AsByte(), Vector128.Zero); - - // subtract, square and accumulate. - Vector128 d0 = Sse2.SubtractSaturate(a01s, b01s); - Vector128 d1 = Sse2.SubtractSaturate(a23s, b23s); - Vector128 e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); - Vector128 e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); - Vector128 sum = Sse2.Add(e0, e1); - - Sse2.Store(tmpPtr, sum); - return tmp[3] + tmp[2] + tmp[1] + tmp[0]; - } -#pragma warning restore SA1503 // Braces should not be omitted + + // Load values. + Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(a)); + Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps, 8))); + Vector128 a2 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 2, 8))); + Vector128 a3 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 3, 8))); + Vector128 b0 = Unsafe.As>(ref MemoryMarshal.GetReference(b)); + Vector128 b1 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps, 8))); + Vector128 b2 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 2, 8))); + Vector128 b3 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 3, 8))); + + // Combine pair of lines. + Vector128 a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); + Vector128 a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32()); + Vector128 b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32()); + Vector128 b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32()); + + // Convert to 16b. + Vector128 a01s = Sse2.UnpackLow(a01.AsByte(), Vector128.Zero); + Vector128 a23s = Sse2.UnpackLow(a23.AsByte(), Vector128.Zero); + Vector128 b01s = Sse2.UnpackLow(b01.AsByte(), Vector128.Zero); + Vector128 b23s = Sse2.UnpackLow(b23.AsByte(), Vector128.Zero); + + // subtract, square and accumulate. + Vector128 d0 = Sse2.SubtractSaturate(a01s, b01s); + Vector128 d1 = Sse2.SubtractSaturate(a23s, b23s); + Vector128 e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); + Vector128 e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); + Vector128 sum = Sse2.Add(e0, e1); + + ref int outputRef = ref MemoryMarshal.GetReference(tmp); + Unsafe.As>(ref outputRef) = sum; + return tmp[3] + tmp[2] + tmp[1] + tmp[0]; } else #endif From 42c2cf7a799af7c5a6b504ec6233fc6a7308c030 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 13:40:40 +0100 Subject: [PATCH 66/85] Disable SA1401 in file: Fields should be private --- src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs index 71983055c0..2ce383d9e1 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs @@ -10,6 +10,7 @@ internal class Vp8SegmentInfo /// /// Gets the quantization matrix y1. /// +#pragma warning disable SA1401 // Fields should be private public Vp8Matrix Y1; /// @@ -21,6 +22,7 @@ internal class Vp8SegmentInfo /// Gets the quantization matrix uv. /// public Vp8Matrix Uv; +#pragma warning restore SA1401 // Fields should be private /// /// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness. From 8160a0eeb6a7bb5e8dc65ca1827a754d5a0e1e81 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 13:40:54 +0100 Subject: [PATCH 67/85] Pass Vp8Matrix as ref --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 34 +++++++++---------- .../Formats/WebP/QuantEncTests.cs | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 4c3a2ff5e3..97ef27d259 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -315,14 +315,14 @@ public static int ReconstructIntra16(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8M } Vp8Encoding.FTransformWht(tmp, dcTmp, scratch); - nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24; + nz |= QuantizeBlock(dcTmp, rd.YDcLevels, ref dqm.Y2) << 24; for (n = 0; n < 16; n += 2) { // Zero-out the first coeff, so that: a) nz is correct below, and // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. tmp[n * 16] = tmp[(n + 1) * 16] = 0; - nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), ref dqm.Y1) << n; } // Transform back. @@ -343,7 +343,7 @@ public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span< tmp.Clear(); scratch.Clear(); Vp8Encoding.FTransform(src, reference, tmp, scratch); - int nz = QuantizeBlock(tmp, levels, dqm.Y1); + int nz = QuantizeBlock(tmp, levels, ref dqm.Y1); Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch); return nz; @@ -370,11 +370,11 @@ public static int ReconstructUv(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8ModeSc scratch); } - CorrectDcValues(it, dqm.Uv, tmp, rd); + CorrectDcValues(it, ref dqm.Uv, tmp, rd); for (n = 0; n < 8; n += 2) { - nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), ref dqm.Uv) << n; } for (n = 0; n < 8; n += 2) @@ -525,19 +525,18 @@ public static void RefineUsingDistortion(Vp8EncIterator it, Vp8SegmentInfo[] seg } [MethodImpl(InliningOptions.ShortMethod)] - public static int Quantize2Blocks(Span input, Span output, Vp8Matrix mtx) + public static int Quantize2Blocks(Span input, Span output, ref Vp8Matrix mtx) { - int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), mtx) << 0; - nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), mtx) << 1; + int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), ref mtx) << 0; + nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), ref mtx) << 1; return nz; } - public static int QuantizeBlock(Span input, Span output, Vp8Matrix mtx) + public static int QuantizeBlock(Span input, Span output, ref Vp8Matrix mtx) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse41.IsSupported) { -#pragma warning disable SA1503 // Braces should not be omitted // Load all inputs. Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); @@ -624,10 +623,9 @@ public static int QuantizeBlock(Span input, Span output, Vp8Matrix Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); - // Detect if all 'out' values are zeroes or not. + // Detect if all 'out' values are zeros or not. Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; -#pragma warning restore SA1503 // Braces should not be omitted } else #endif @@ -675,7 +673,7 @@ public static int QuantizeBlock(Span input, Span output, Vp8Matrix // Quantize as usual, but also compute and return the quantization error. // Error is already divided by DSHIFT. - public static int QuantizeSingle(Span v, Vp8Matrix mtx) + public static int QuantizeSingle(Span v, ref Vp8Matrix mtx) { int v0 = v[0]; bool sign = v0 < 0; @@ -696,7 +694,7 @@ public static int QuantizeSingle(Span v, Vp8Matrix mtx) return (sign ? -v0 : v0) >> DSCALE; } - public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) + public static void CorrectDcValues(Vp8EncIterator it, ref Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) { #pragma warning disable SA1005 // Single line comments should begin with single space // | top[0] | top[1] @@ -713,13 +711,13 @@ public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, Span Span left = it.LeftDerr.AsSpan(ch, 2); Span c = tmp.Slice(ch * 4 * 16, 4 * 16); c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE)); - int err0 = QuantizeSingle(c, mtx); + int err0 = QuantizeSingle(c, ref mtx); c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE)); - int err1 = QuantizeSingle(c.Slice(1 * 16), mtx); + int err1 = QuantizeSingle(c.Slice(1 * 16), ref mtx); c[2 * 16] += (short)(((C1 * err0) + (C2 * left[1])) >> (DSHIFT - DSCALE)); - int err2 = QuantizeSingle(c.Slice(2 * 16), mtx); + int err2 = QuantizeSingle(c.Slice(2 * 16), ref mtx); c[3 * 16] += (short)(((C1 * err1) + (C2 * err2)) >> (DSHIFT - DSCALE)); - int err3 = QuantizeSingle(c.Slice(3 * 16), mtx); + int err3 = QuantizeSingle(c.Slice(3 * 16), ref mtx); rd.Derr[ch, 0] = err1; rd.Derr[ch, 1] = err2; diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index 7465c42cef..55738199b7 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -32,7 +32,7 @@ private static unsafe void RunQuantizeBlockTest() } // act - int actualResult = QuantEnc.QuantizeBlock(input, output, vp8Matrix); + int actualResult = QuantEnc.QuantizeBlock(input, output, ref vp8Matrix); // assert Assert.True(output.SequenceEqual(expectedOutput)); From 1418e53bfbb719c36d57f4ac46317ca990d2fba2 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 14:58:31 +0100 Subject: [PATCH 68/85] Remove not need Clear of tmp buffer --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 408f6f066f..7c262a30ee 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -947,7 +947,6 @@ public static void Mean16x4(Span input, Span dc, Span tmp) #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) { - tmp.Clear(); Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps, 16))); Vector128 a2 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps * 2, 16))); From 3cfa040b2099a5c91c8b1e15e5f2fd4c440a6f77 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 15:38:20 +0100 Subject: [PATCH 69/85] Use Ssse3.HorizontalAdd --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 14 +++++++------- .../Formats/Webp/Lossy/Vp8EncIterator.cs | 2 +- .../Formats/WebP/LossyUtilsTests.cs | 5 ++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 7c262a30ee..5b27af821d 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -942,7 +942,7 @@ public static void HFilter8i(Span u, Span v, int offset, int stride, FilterLoop24(v, offsetPlus4, 1, stride, 8, thresh, ithresh, hevThresh); } - public static void Mean16x4(Span input, Span dc, Span tmp) + public static void Mean16x4(Span input, Span dc) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) @@ -966,13 +966,13 @@ public static void Mean16x4(Span input, Span dc, Span tmp) Vector128 e0 = Sse2.Add(d0, d1); Vector128 e1 = Sse2.Add(d2, d3); Vector128 f0 = Sse2.Add(e0, e1); - ref ushort outputRef = ref MemoryMarshal.GetReference(tmp); - Unsafe.As>(ref outputRef) = f0.AsUInt16(); + Vector128 hadd = Ssse3.HorizontalAdd(f0.AsInt16(), f0.AsInt16()); + Vector64 lower = hadd.GetLower(); - dc[0] = (uint)(tmp[1] + tmp[0]); - dc[1] = (uint)(tmp[3] + tmp[2]); - dc[2] = (uint)(tmp[5] + tmp[4]); - dc[3] = (uint)(tmp[7] + tmp[6]); + dc[0] = (uint)lower.GetElement(0); + dc[1] = (uint)lower.GetElement(1); + dc[2] = (uint)lower.GetElement(2); + dc[3] = (uint)lower.GetElement(3); } else #endif diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs index 57e18832ed..6279aef656 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs @@ -363,7 +363,7 @@ public int FastMbAnalyze(int quality) uint m2; for (k = 0; k < 16; k += 4) { - LossyUtils.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.Slice(k, 4), tmp); + LossyUtils.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.Slice(k, 4)); } for (m = 0, m2 = 0, k = 0; k < 16; k++) diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs index 16b8e11660..09727293ce 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -25,11 +25,10 @@ private static void RunMean16x4Test() 173, 175, 166, 155, 155, 159, 159, 158 }; uint[] dc = new uint[4]; - ushort[] tmp = new ushort[8]; uint[] expectedDc = { 1940, 2139, 2252, 1813 }; // act - LossyUtils.Mean16x4(input, dc, tmp); + LossyUtils.Mean16x4(input, dc); // assert Assert.True(dc.SequenceEqual(expectedDc)); @@ -73,7 +72,7 @@ private static void RunHadamardTransformTest() public void Mean16x4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.AllowAll); [Fact] - public void Mean16x4_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.DisableSSE2); + public void Mean16x4_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.DisableHWIntrinsic); [Fact] public void HadamardTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.AllowAll); From 84732bf14722ef50e01f1fd21c6c86e61a77eae2 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 15:39:16 +0100 Subject: [PATCH 70/85] Reverse access to bgr --- src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs b/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs index 24143785ab..a9cf876c80 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs @@ -307,9 +307,9 @@ public static uint LoadUv(byte u, byte v) => [MethodImpl(InliningOptions.ShortMethod)] public static void YuvToBgr(int y, int u, int v, Span bgr) { - bgr[0] = (byte)YuvToB(y, u); - bgr[1] = (byte)YuvToG(y, u, v); bgr[2] = (byte)YuvToR(y, v); + bgr[1] = (byte)YuvToG(y, u, v); + bgr[0] = (byte)YuvToB(y, u); } [MethodImpl(InliningOptions.ShortMethod)] From 50013d70f28c2d67e1a7e96e61174460e67fbc7f Mon Sep 17 00:00:00 2001 From: Brian Popow <38701097+brianpopow@users.noreply.github.com> Date: Tue, 9 Nov 2021 15:51:02 +0100 Subject: [PATCH 71/85] Update src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs Reverse access to dc Co-authored-by: James Jackson-South --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 5b27af821d..e6a4e61701 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -969,10 +969,10 @@ public static void Mean16x4(Span input, Span dc) Vector128 hadd = Ssse3.HorizontalAdd(f0.AsInt16(), f0.AsInt16()); Vector64 lower = hadd.GetLower(); - dc[0] = (uint)lower.GetElement(0); - dc[1] = (uint)lower.GetElement(1); - dc[2] = (uint)lower.GetElement(2); dc[3] = (uint)lower.GetElement(3); + dc[2] = (uint)lower.GetElement(2); + dc[1] = (uint)lower.GetElement(1); + dc[0] = (uint)lower.GetElement(0); } else #endif From f0cb89e811be0fefc6a5a4d2f76797e7a2d8822c Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 16:36:42 +0100 Subject: [PATCH 72/85] Change IsSupported check from SSE2 to Ssse3 --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index e6a4e61701..4ef9c56947 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -945,7 +945,7 @@ public static void HFilter8i(Span u, Span v, int offset, int stride, public static void Mean16x4(Span input, Span dc) { #if SUPPORTS_RUNTIME_INTRINSICS - if (Sse2.IsSupported) + if (Ssse3.IsSupported) { Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps, 16))); From 1452ba00836cca274719844100259606750d56b7 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 16:40:55 +0100 Subject: [PATCH 73/85] Remove not needed GetLower --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 4ef9c56947..ac3b1d3806 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -967,12 +967,11 @@ public static void Mean16x4(Span input, Span dc) Vector128 e1 = Sse2.Add(d2, d3); Vector128 f0 = Sse2.Add(e0, e1); Vector128 hadd = Ssse3.HorizontalAdd(f0.AsInt16(), f0.AsInt16()); - Vector64 lower = hadd.GetLower(); - dc[3] = (uint)lower.GetElement(3); - dc[2] = (uint)lower.GetElement(2); - dc[1] = (uint)lower.GetElement(1); - dc[0] = (uint)lower.GetElement(0); + dc[3] = (uint)hadd.GetElement(3); + dc[2] = (uint)hadd.GetElement(2); + dc[1] = (uint)hadd.GetElement(1); + dc[0] = (uint)hadd.GetElement(0); } else #endif From de3140bbc29f4914425564538c849731b531dbeb Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 16:58:48 +0100 Subject: [PATCH 74/85] Use Numerics.ReduceSum(sum) --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index c1af2a4534..5b7d4d8981 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -27,8 +27,6 @@ public static int Vp8Sse4X4(Span a, Span b) #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) { - Span tmp = stackalloc int[4]; - // Load values. Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(a)); Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps, 8))); @@ -58,9 +56,7 @@ public static int Vp8Sse4X4(Span a, Span b) Vector128 e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); Vector128 sum = Sse2.Add(e0, e1); - ref int outputRef = ref MemoryMarshal.GetReference(tmp); - Unsafe.As>(ref outputRef) = sum; - return tmp[3] + tmp[2] + tmp[1] + tmp[0]; + return Numerics.ReduceSum(sum); } else #endif @@ -658,9 +654,6 @@ public static int TTransform(Span input, Span w, Span scratch /// public static int TTransformSse41(Span inputA, Span inputB, Span w, Span scratch) { - Span sum = scratch.Slice(0, 4); - sum.Clear(); - // Load and combine inputs. Vector128 ina0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA)); Vector128 ina1 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps, 16))); @@ -765,9 +758,7 @@ public static int TTransformSse41(Span inputA, Span inputB, Span result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32()); - ref int outputRef = ref MemoryMarshal.GetReference(sum); - Unsafe.As>(ref outputRef) = result.AsInt32(); - return sum[3] + sum[2] + sum[1] + sum[0]; + return Numerics.ReduceSum(result); } #endif From 80a826f506ae94372b488c099969abd95dc6d16e Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 17:28:30 +0100 Subject: [PATCH 75/85] Remove not needed clear --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 5b7d4d8981..febca037b5 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -13,7 +13,7 @@ // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal static unsafe class LossyUtils + internal static class LossyUtils { [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); @@ -771,7 +771,6 @@ public static void TransformTwo(Span src, Span dst, Span scrat public static void TransformOne(Span src, Span dst, Span scratch) { Span tmp = scratch.Slice(0, 16); - tmp.Clear(); int tmpOffset = 0; for (int srcOffset = 0; srcOffset < 4; srcOffset++) { From 5abd7740e81d8d54bd24db235c3f90e1e5d02803 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 18:05:14 +0100 Subject: [PATCH 76/85] Add Vp8Sse4X4 sse tests --- .../Formats/WebP/LossyUtilsTests.cs | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs index f8b488fde5..15b312835d 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -10,6 +10,35 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP [Trait("Format", "Webp")] public class LossyUtilsTests { + private static void RunVp8Sse4X4Test() + { + byte[] a = + { + 27, 27, 28, 29, 29, 28, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, + 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 29, 29, 28, + 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 26, + 26, 26, 26, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, + 128, 128, 128, 128, 128, 128, 128, 28, 27, 27, 26, 26, 27, 27, 28, 27, 28, 28, 29, 29, 28, 28, 27, + 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128 + }; + + byte[] b = + { + 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, + 204, 204, 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, + 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, + 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204 + }; + + int expected = 27; + + int actual = LossyUtils.Vp8Sse4X4(a, b); + + Assert.Equal(expected, actual); + } + private static void RunHadamardTransformTest() { byte[] a = @@ -37,10 +66,19 @@ private static void RunHadamardTransformTest() Assert.Equal(expected, actual); } + [Fact] + public void Vp8Sse4X4_Works() => RunVp8Sse4X4Test(); + [Fact] public void HadamardTransform_Works() => RunHadamardTransformTest(); #if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void Vp8Sse4X4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.AllowAll); + + [Fact] + public void Vp8Sse4X4_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.DisableHWIntrinsic); + [Fact] public void HadamardTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.AllowAll); From 5ead84416dfc37e7fa41a36a9d58e15ac85d4232 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 18:56:30 +0100 Subject: [PATCH 77/85] Use Array.Clear to reset the arrays --- .../Formats/Webp/Lossy/Vp8ModeScore.cs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs index 1c92a9d2d9..69841b557e 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs @@ -97,18 +97,11 @@ public Vp8ModeScore() public void Clear() { - this.YDcLevels.AsSpan().Clear(); - this.YAcLevels.AsSpan().Clear(); - this.UvLevels.AsSpan().Clear(); - this.ModesI4.AsSpan().Clear(); - - for (int i = 0; i < 2; i++) - { - for (int j = 0; j < 3; j++) - { - this.Derr[i, j] = 0; - } - } + Array.Clear(this.YDcLevels, 0, this.YDcLevels.Length); + Array.Clear(this.YAcLevels, 0, this.YAcLevels.Length); + Array.Clear(this.UvLevels, 0, this.UvLevels.Length); + Array.Clear(this.ModesI4, 0, this.ModesI4.Length); + Array.Clear(this.Derr, 0, this.Derr.Length); } public void InitScore() From 7d8225b59a633b08b51e74bbb960d4d52b420a84 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 19:38:12 +0100 Subject: [PATCH 78/85] Use UnpackLow to set the dc values --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index ac3b1d3806..3064ccc030 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -13,7 +13,7 @@ // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal static unsafe class LossyUtils + internal static class LossyUtils { #if SUPPORTS_RUNTIME_INTRINSICS private static readonly Vector128 Mean16x4Mask = Vector128.Create((short)0x00ff).AsByte(); @@ -967,11 +967,10 @@ public static void Mean16x4(Span input, Span dc) Vector128 e1 = Sse2.Add(d2, d3); Vector128 f0 = Sse2.Add(e0, e1); Vector128 hadd = Ssse3.HorizontalAdd(f0.AsInt16(), f0.AsInt16()); + Vector128 wide = Sse2.UnpackLow(hadd, Vector128.Zero).AsUInt32(); - dc[3] = (uint)hadd.GetElement(3); - dc[2] = (uint)hadd.GetElement(2); - dc[1] = (uint)hadd.GetElement(1); - dc[0] = (uint)hadd.GetElement(0); + ref uint outputRef = ref MemoryMarshal.GetReference(dc); + Unsafe.As>(ref outputRef) = wide; } else #endif From 7312b1a8389c1824409205a5bbfd4ad14224d9c3 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 19:43:49 +0100 Subject: [PATCH 79/85] Dont use slice --- .../Formats/Webp/Lossy/LossyUtils.cs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index febca037b5..19a71c3e56 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -28,14 +28,16 @@ public static int Vp8Sse4X4(Span a, Span b) if (Sse2.IsSupported) { // Load values. - Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(a)); - Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps, 8))); - Vector128 a2 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 2, 8))); - Vector128 a3 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 3, 8))); - Vector128 b0 = Unsafe.As>(ref MemoryMarshal.GetReference(b)); - Vector128 b1 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps, 8))); - Vector128 b2 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 2, 8))); - Vector128 b3 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 3, 8))); + ref byte aRef = ref MemoryMarshal.GetReference(a); + Vector128 a0 = Unsafe.As>(ref aRef); + Vector128 a1 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps)); + Vector128 a2 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 2)); + Vector128 a3 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 3)); + ref byte bRef = ref MemoryMarshal.GetReference(b); + Vector128 b0 = Unsafe.As>(ref bRef); + Vector128 b1 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps)); + Vector128 b2 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 2)); + Vector128 b3 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 3)); // Combine pair of lines. Vector128 a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); From 3dd7c8ea41709173759b02eff4c51268eb2c9f33 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 20:55:23 +0100 Subject: [PATCH 80/85] Remove unnecessary Clear() and scratch buffer --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 4 ++-- src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 19a71c3e56..cb839559fa 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -127,7 +127,7 @@ public static int Vp8Disto4X4(Span a, Span b, Span w, Span> 5; } else @@ -654,7 +654,7 @@ public static int TTransform(Span input, Span w, Span scratch /// Returns the weighted sum of the absolute value of transformed coefficients. /// w[] contains a row-major 4 by 4 symmetric matrix. /// - public static int TTransformSse41(Span inputA, Span inputB, Span w, Span scratch) + public static int TTransformSse41(Span inputA, Span inputB, Span w) { // Load and combine inputs. Vector128 ina0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA)); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs index 7192fa2d05..6e724e4758 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs @@ -49,7 +49,6 @@ public void CollectHistogram(Span reference, Span pred, int startBlo this.distribution.AsSpan().Clear(); for (j = startBlock; j < endBlock; j++) { - this.output.AsSpan().Clear(); this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output); // Convert coefficients to bin. From 5630b25733e98b004b6a0bfe8996cbac47b6c304 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 21:58:52 +0100 Subject: [PATCH 81/85] Remove more unnecessary Clear's --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 4 ---- src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs | 3 --- 2 files changed, 7 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 97ef27d259..d0baa260cc 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -340,8 +340,6 @@ public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span< Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); Span tmp = it.Scratch2.AsSpan(0, 16); Span scratch = it.Scratch3.AsSpan(0, 16); - tmp.Clear(); - scratch.Clear(); Vp8Encoding.FTransform(src, reference, tmp, scratch); int nz = QuantizeBlock(tmp, levels, ref dqm.Y1); Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch); @@ -357,8 +355,6 @@ public static int ReconstructUv(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8ModeSc int n; Span tmp = it.Scratch2.AsSpan(0, 8 * 16); Span scratch = it.Scratch3.AsSpan(0, 16); - tmp.Clear(); - scratch.Clear(); for (n = 0; n < 8; n += 2) { diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs index 0567a0f27d..af7e8eaa36 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs @@ -81,7 +81,6 @@ public static void ITransformOne(Span reference, Span input, Span tmp = scratch.Slice(0, 16); - tmp.Clear(); for (i = 0; i < 4; i++) { // vertical pass. @@ -124,7 +123,6 @@ public static void FTransform(Span src, Span reference, Span { int i; Span tmp = scratch.Slice(0, 16); - tmp.Clear(); int srcIdx = 0; int refIdx = 0; @@ -163,7 +161,6 @@ public static void FTransform(Span src, Span reference, Span public static void FTransformWht(Span input, Span output, Span scratch) { Span tmp = scratch.Slice(0, 16); - tmp.Clear(); int i; int inputIdx = 0; From 7e20c5daaadefdd3c1073088bc74f1adf0d3436b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Wed, 10 Nov 2021 12:10:46 +0100 Subject: [PATCH 82/85] Rename Vp8Sse methods --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 13 ++++++++----- .../Formats/WebP/LossyUtilsTests.cs | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index d019b5cd54..a10ec6eabb 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -19,14 +19,17 @@ internal static class LossyUtils private static readonly Vector128 Mean16x4Mask = Vector128.Create((short)0x00ff).AsByte(); #endif + // Note: method name in libwebp reference implementation is called VP8SSE16x16. [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); + public static int Vp8_Sse16X16(Span a, Span b) => Vp8_SseNxN(a, b, 16, 16); + // Note: method name in libwebp reference implementation is called VP8SSE16x8. [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Sse16X8(Span a, Span b) => GetSse(a, b, 16, 8); + public static int Vp8_Sse16X8(Span a, Span b) => Vp8_SseNxN(a, b, 16, 8); + // Note: method name in libwebp reference implementation is called VP8SSE4x4. [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Sse4X4(Span a, Span b) + public static int Vp8_Sse4X4(Span a, Span b) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) @@ -67,12 +70,12 @@ public static int Vp8Sse4X4(Span a, Span b) else #endif { - return GetSse(a, b, 4, 4); + return Vp8_SseNxN(a, b, 4, 4); } } [MethodImpl(InliningOptions.ShortMethod)] - public static int GetSse(Span a, Span b, int w, int h) + public static int Vp8_SseNxN(Span a, Span b, int w, int h) { int count = 0; int aOffset = 0; diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs index 9d7545c321..d176a5933d 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -35,7 +35,7 @@ private static void RunVp8Sse4X4Test() int expected = 27; - int actual = LossyUtils.Vp8Sse4X4(a, b); + int actual = LossyUtils.Vp8_Sse4X4(a, b); Assert.Equal(expected, actual); } From 1997d595e7d496c031e861b8f094a3ba05f94fd0 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Wed, 10 Nov 2021 12:14:08 +0100 Subject: [PATCH 83/85] Fix build error due to renaming --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index d0baa260cc..38ed80590d 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -66,7 +66,7 @@ public static void PickBestIntra16(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Se rdCur.Nz = (uint)ReconstructIntra16(it, dqm, rdCur, tmpDst, mode); // Measure RD-score. - rdCur.D = LossyUtils.Vp8Sse16X16(src, tmpDst); + rdCur.D = LossyUtils.Vp8_Sse16X16(src, tmpDst); rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0; rdCur.H = WebpConstants.Vp8FixedCostsI16[mode]; rdCur.R = it.GetCostLuma16(rdCur, proba, res); @@ -160,7 +160,7 @@ public static bool PickBestIntra4(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Seg rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode); // Compute RD-score. - rdTmp.D = LossyUtils.Vp8Sse4X4(src, tmpDst); + rdTmp.D = LossyUtils.Vp8_Sse4X4(src, tmpDst); rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0; rdTmp.H = modeCosts[mode]; @@ -251,7 +251,7 @@ public static void PickBestUv(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8Segment rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode); // Compute RD-score - rdUv.D = LossyUtils.Vp8Sse16X8(src, tmpDst); + rdUv.D = LossyUtils.Vp8_Sse16X8(src, tmpDst); rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas. rdUv.H = WebpConstants.Vp8FixedCostsUv[mode]; rdUv.R = it.GetCostUv(rdUv, proba, res); @@ -407,7 +407,7 @@ public static void RefineUsingDistortion(Vp8EncIterator it, Vp8SegmentInfo[] seg for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]); - long score = (LossyUtils.Vp8Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16); + long score = (LossyUtils.Vp8_Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16); if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit) { @@ -454,7 +454,7 @@ public static void RefineUsingDistortion(Vp8EncIterator it, Vp8SegmentInfo[] seg for (mode = 0; mode < WebpConstants.NumBModes; ++mode) { Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); - long score = (LossyUtils.Vp8Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4); + long score = (LossyUtils.Vp8_Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4); if (score < bestI4Score) { bestI4Mode = mode; @@ -503,7 +503,7 @@ public static void RefineUsingDistortion(Vp8EncIterator it, Vp8SegmentInfo[] seg for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]); - long score = (LossyUtils.Vp8Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv); + long score = (LossyUtils.Vp8_Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv); if (score < bestUvScore) { bestMode = mode; From 55040a094b97a2941a6de5452b93d407a1af7f89 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 11 Nov 2021 12:52:07 +1100 Subject: [PATCH 84/85] Update codcov and config --- ImageSharp.sln | 5 +++-- codecov.yml | 11 +++++++++++ shared-infrastructure | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ImageSharp.sln b/ImageSharp.sln index c188d93150..f16f98ac59 100644 --- a/ImageSharp.sln +++ b/ImageSharp.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.28902.138 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 MinimumVisualStudioVersion = 10.0.40219.1 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1-D75E-4C6D-83EB-80367343E0D7}" ProjectSection(SolutionItems) = preProject @@ -13,6 +13,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1 ci-build.ps1 = ci-build.ps1 ci-pack.ps1 = ci-pack.ps1 ci-test.ps1 = ci-test.ps1 + codecov.yml = codecov.yml Directory.Build.props = Directory.Build.props Directory.Build.targets = Directory.Build.targets LICENSE = LICENSE diff --git a/codecov.yml b/codecov.yml index 833fc0a51a..310eefb8c2 100644 --- a/codecov.yml +++ b/codecov.yml @@ -9,3 +9,14 @@ codecov: # Avoid Report Expired # https://docs.codecov.io/docs/codecov-yaml#section-expired-reports max_report_age: off + +coverage: + # Use integer precision + # https://docs.codecov.com/docs/codecovyml-reference#coverageprecision + precision: 0 + + # Explicitly control coverage status checks + # https://docs.codecov.com/docs/commit-status#disabling-a-status + status: + project: on + patch: off diff --git a/shared-infrastructure b/shared-infrastructure index a042aba176..ac1f5ee0ca 160000 --- a/shared-infrastructure +++ b/shared-infrastructure @@ -1 +1 @@ -Subproject commit a042aba176cdb840d800c6ed4cfe41a54fb7b1e3 +Subproject commit ac1f5ee0ca70c070ecdda8771198a052623ac247 From 3ac8b2b713f97d2840e022dd4eebfc5ba2738cf9 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 11 Nov 2021 13:22:12 +1100 Subject: [PATCH 85/85] Use shared config --- shared-infrastructure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared-infrastructure b/shared-infrastructure index ac1f5ee0ca..33cb12ca77 160000 --- a/shared-infrastructure +++ b/shared-infrastructure @@ -1 +1 @@ -Subproject commit ac1f5ee0ca70c070ecdda8771198a052623ac247 +Subproject commit 33cb12ca77f919b44de56f344d2627cc2a108c3a