diff --git a/ImageSharp.sln b/ImageSharp.sln
index c188d93150..f16f98ac59 100644
--- a/ImageSharp.sln
+++ b/ImageSharp.sln
@@ -1,7 +1,7 @@
Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.28902.138
+# Visual Studio Version 17
+VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1-D75E-4C6D-83EB-80367343E0D7}"
ProjectSection(SolutionItems) = preProject
@@ -13,6 +13,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1
ci-build.ps1 = ci-build.ps1
ci-pack.ps1 = ci-pack.ps1
ci-test.ps1 = ci-test.ps1
+ codecov.yml = codecov.yml
Directory.Build.props = Directory.Build.props
Directory.Build.targets = Directory.Build.targets
LICENSE = LICENSE
diff --git a/codecov.yml b/codecov.yml
index 833fc0a51a..310eefb8c2 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -9,3 +9,14 @@ codecov:
# Avoid Report Expired
# https://docs.codecov.io/docs/codecov-yaml#section-expired-reports
max_report_age: off
+
+coverage:
+ # Use integer precision
+ # https://docs.codecov.com/docs/codecovyml-reference#coverageprecision
+ precision: 0
+
+ # Explicitly control coverage status checks
+ # https://docs.codecov.com/docs/commit-status#disabling-a-status
+ status:
+ project: on
+ patch: off
diff --git a/shared-infrastructure b/shared-infrastructure
index a042aba176..33cb12ca77 160000
--- a/shared-infrastructure
+++ b/shared-infrastructure
@@ -1 +1 @@
-Subproject commit a042aba176cdb840d800c6ed4cfe41a54fb7b1e3
+Subproject commit 33cb12ca77f919b44de56f344d2627cc2a108c3a
diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs
index 0455fd26a4..bf7869e53d 100644
--- a/src/ImageSharp/Color/Color.Conversions.cs
+++ b/src/ImageSharp/Color/Color.Conversions.cs
@@ -17,56 +17,118 @@ public readonly partial struct Color
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Rgba64 pixel) => this.data = pixel;
+ public Color(Rgba64 pixel)
+ {
+ this.data = pixel;
+ this.boxedHighPrecisionPixel = null;
+ }
+
+ ///
+ /// Initializes a new instance of the struct.
+ ///
+ /// The containing the color information.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public Color(Rgb48 pixel)
+ {
+ this.data = new Rgba64(pixel.R, pixel.G, pixel.B, ushort.MaxValue);
+ this.boxedHighPrecisionPixel = null;
+ }
+
+ ///
+ /// Initializes a new instance of the struct.
+ ///
+ /// The containing the color information.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public Color(La32 pixel)
+ {
+ this.data = new Rgba64(pixel.L, pixel.L, pixel.L, pixel.A);
+ this.boxedHighPrecisionPixel = null;
+ }
+
+ ///
+ /// Initializes a new instance of the struct.
+ ///
+ /// The containing the color information.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public Color(L16 pixel)
+ {
+ this.data = new Rgba64(pixel.PackedValue, pixel.PackedValue, pixel.PackedValue, ushort.MaxValue);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Rgba32 pixel) => this.data = new Rgba64(pixel);
+ public Color(Rgba32 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Argb32 pixel) => this.data = new Rgba64(pixel);
+ public Color(Argb32 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Bgra32 pixel) => this.data = new Rgba64(pixel);
+ public Color(Bgra32 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Rgb24 pixel) => this.data = new Rgba64(pixel);
+ public Color(Rgb24 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Bgr24 pixel) => this.data = new Rgba64(pixel);
+ public Color(Bgr24 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Vector4 vector) => this.data = new Rgba64(vector);
+ public Color(Vector4 vector)
+ {
+ vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One);
+ this.boxedHighPrecisionPixel = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W);
+ this.data = default;
+ }
///
/// Converts a to .
///
/// The .
/// The .
- public static explicit operator Vector4(Color color) => color.data.ToVector4();
+ public static explicit operator Vector4(Color color) => color.ToVector4();
///
/// Converts an to .
@@ -74,24 +136,82 @@ public readonly partial struct Color
/// The .
/// The .
[MethodImpl(InliningOptions.ShortMethod)]
- public static explicit operator Color(Vector4 source) => new Color(source);
+ public static explicit operator Color(Vector4 source) => new(source);
[MethodImpl(InliningOptions.ShortMethod)]
- internal Rgba32 ToRgba32() => this.data.ToRgba32();
+ internal Rgba32 ToRgba32()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToRgba32();
+ }
+
+ Rgba32 value = default;
+ this.boxedHighPrecisionPixel.ToRgba32(ref value);
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Bgra32 ToBgra32() => this.data.ToBgra32();
+ internal Bgra32 ToBgra32()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToBgra32();
+ }
+
+ Bgra32 value = default;
+ value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Argb32 ToArgb32() => this.data.ToArgb32();
+ internal Argb32 ToArgb32()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToArgb32();
+ }
+
+ Argb32 value = default;
+ value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Rgb24 ToRgb24() => this.data.ToRgb24();
+ internal Rgb24 ToRgb24()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToRgb24();
+ }
+
+ Rgb24 value = default;
+ value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Bgr24 ToBgr24() => this.data.ToBgr24();
+ internal Bgr24 ToBgr24()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToBgr24();
+ }
+
+ Bgr24 value = default;
+ value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Vector4 ToVector4() => this.data.ToVector4();
+ internal Vector4 ToVector4()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToScaledVector4();
+ }
+
+ return this.boxedHighPrecisionPixel.ToScaledVector4();
+ }
}
}
diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs
index d5eedc160b..7c21d62ddf 100644
--- a/src/ImageSharp/Color/Color.cs
+++ b/src/ImageSharp/Color/Color.cs
@@ -4,7 +4,6 @@
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
@@ -21,6 +20,7 @@ namespace SixLabors.ImageSharp
public readonly partial struct Color : IEquatable
{
private readonly Rgba64 data;
+ private readonly IPixel boxedHighPrecisionPixel;
[MethodImpl(InliningOptions.ShortMethod)]
private Color(byte r, byte g, byte b, byte a)
@@ -30,6 +30,8 @@ private Color(byte r, byte g, byte b, byte a)
ColorNumerics.UpscaleFrom8BitTo16Bit(g),
ColorNumerics.UpscaleFrom8BitTo16Bit(b),
ColorNumerics.UpscaleFrom8BitTo16Bit(a));
+
+ this.boxedHighPrecisionPixel = null;
}
[MethodImpl(InliningOptions.ShortMethod)]
@@ -40,6 +42,15 @@ private Color(byte r, byte g, byte b)
ColorNumerics.UpscaleFrom8BitTo16Bit(g),
ColorNumerics.UpscaleFrom8BitTo16Bit(b),
ushort.MaxValue);
+
+ this.boxedHighPrecisionPixel = null;
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private Color(IPixel pixel)
+ {
+ this.boxedHighPrecisionPixel = pixel;
+ this.data = default;
}
///
@@ -52,13 +63,10 @@ private Color(byte r, byte g, byte b)
/// otherwise, false.
///
[MethodImpl(InliningOptions.ShortMethod)]
- public static bool operator ==(Color left, Color right)
- {
- return left.Equals(right);
- }
+ public static bool operator ==(Color left, Color right) => left.Equals(right);
///
- /// Checks whether two structures are equal.
+ /// Checks whether two structures are not equal.
///
/// The left hand operand.
/// The right hand operand.
@@ -67,10 +75,7 @@ private Color(byte r, byte g, byte b)
/// otherwise, false.
///
[MethodImpl(InliningOptions.ShortMethod)]
- public static bool operator !=(Color left, Color right)
- {
- return !left.Equals(right);
- }
+ public static bool operator !=(Color left, Color right) => !left.Equals(right);
///
/// Creates a from RGBA bytes.
@@ -81,7 +86,7 @@ private Color(byte r, byte g, byte b)
/// The alpha component (0-255).
/// The .
[MethodImpl(InliningOptions.ShortMethod)]
- public static Color FromRgba(byte r, byte g, byte b, byte a) => new Color(r, g, b, a);
+ public static Color FromRgba(byte r, byte g, byte b, byte a) => new(r, g, b, a);
///
/// Creates a from RGB bytes.
@@ -91,7 +96,46 @@ private Color(byte r, byte g, byte b)
/// The blue component (0-255).
/// The .
[MethodImpl(InliningOptions.ShortMethod)]
- public static Color FromRgb(byte r, byte g, byte b) => new Color(r, g, b);
+ public static Color FromRgb(byte r, byte g, byte b) => new(r, g, b);
+
+ ///
+ /// Creates a from the given .
+ ///
+ /// The pixel to convert from.
+ /// The pixel format.
+ /// The .
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static Color FromPixel(TPixel pixel)
+ where TPixel : unmanaged, IPixel
+ {
+ // Avoid boxing in case we can convert to Rgba64 safely and efficently
+ if (typeof(TPixel) == typeof(Rgba64))
+ {
+ return new((Rgba64)(object)pixel);
+ }
+ else if (typeof(TPixel) == typeof(Rgb48))
+ {
+ return new((Rgb48)(object)pixel);
+ }
+ else if (typeof(TPixel) == typeof(La32))
+ {
+ return new((La32)(object)pixel);
+ }
+ else if (typeof(TPixel) == typeof(L16))
+ {
+ return new((L16)(object)pixel);
+ }
+ else if (Unsafe.SizeOf() <= Unsafe.SizeOf())
+ {
+ Rgba32 p = default;
+ pixel.ToRgba32(ref p);
+ return new(p);
+ }
+ else
+ {
+ return new(pixel);
+ }
+ }
///
/// Creates a new instance of the struct
@@ -213,7 +257,7 @@ public Color WithAlpha(float alpha)
public override string ToString() => this.ToHex();
///
- /// Converts the color instance to a specified type.
+ /// Converts the color instance to a specified type.
///
/// The pixel type to convert to.
/// The pixel value.
@@ -221,13 +265,18 @@ public Color WithAlpha(float alpha)
public TPixel ToPixel()
where TPixel : unmanaged, IPixel
{
- TPixel pixel = default;
+ if (this.boxedHighPrecisionPixel is TPixel pixel)
+ {
+ return pixel;
+ }
+
+ pixel = default;
pixel.FromRgba64(this.data);
return pixel;
}
///
- /// Bulk converts a span of to a span of a specified type.
+ /// Bulk converts a span of to a span of a specified type.
///
/// The pixel type to convert to.
/// The configuration.
@@ -240,28 +289,38 @@ public static void ToPixel(
Span destination)
where TPixel : unmanaged, IPixel
{
- ReadOnlySpan rgba64Span = MemoryMarshal.Cast(source);
- PixelOperations.Instance.FromRgba64(configuration, rgba64Span, destination);
+ Guard.DestinationShouldNotBeTooShort(source, destination, nameof(destination));
+ for (int i = 0; i < source.Length; i++)
+ {
+ destination[i] = source[i].ToPixel();
+ }
}
///
[MethodImpl(InliningOptions.ShortMethod)]
public bool Equals(Color other)
{
- return this.data.PackedValue == other.data.PackedValue;
+ if (this.boxedHighPrecisionPixel is null && other.boxedHighPrecisionPixel is null)
+ {
+ return this.data.PackedValue == other.data.PackedValue;
+ }
+
+ return this.boxedHighPrecisionPixel?.Equals(other.boxedHighPrecisionPixel) == true;
}
///
- public override bool Equals(object obj)
- {
- return obj is Color other && this.Equals(other);
- }
+ public override bool Equals(object obj) => obj is Color other && this.Equals(other);
///
[MethodImpl(InliningOptions.ShortMethod)]
public override int GetHashCode()
{
- return this.data.PackedValue.GetHashCode();
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.PackedValue.GetHashCode();
+ }
+
+ return this.boxedHighPrecisionPixel.GetHashCode();
}
}
}
diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs
index 987dc150c2..cf3cd7eb14 100644
--- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs
+++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs
@@ -1071,7 +1071,7 @@ private bool TryUncompressTextData(ReadOnlySpan compressedData, Encoding e
int bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length);
while (bytesRead != 0)
{
- uncompressedBytes.AddRange(this.buffer.AsSpan().Slice(0, bytesRead).ToArray());
+ uncompressedBytes.AddRange(this.buffer.AsSpan(0, bytesRead).ToArray());
bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length);
}
diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
index abf44127a9..d6ceca5bf5 100644
--- a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
+++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
@@ -142,10 +142,11 @@ public int GetSigned(int v)
[MethodImpl(InliningOptions.ShortMethod)]
public bool ReadBool() => this.ReadValue(1) is 1;
+ [MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits)
{
- Guard.MustBeGreaterThan(nBits, 0, nameof(nBits));
- Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
+ DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
+ DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
uint v = 0;
while (nBits-- > 0)
@@ -156,10 +157,11 @@ public uint ReadValue(int nBits)
return v;
}
+ [MethodImpl(InliningOptions.ShortMethod)]
public int ReadSignedValue(int nBits)
{
- Guard.MustBeGreaterThan(nBits, 0, nameof(nBits));
- Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
+ DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
+ DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
int value = (int)this.ReadValue(nBits);
return this.ReadValue(1) != 0 ? -value : value;
diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
index 601336fa4b..4df2feba81 100644
--- a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
+++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
@@ -28,7 +28,7 @@ internal class Vp8LBitReader : BitReaderBase
///
private const int Wbits = 32;
- private readonly uint[] bitMask =
+ private static readonly uint[] BitMask =
{
0,
0x000001, 0x000003, 0x000007, 0x00000f,
@@ -125,19 +125,19 @@ public Vp8LBitReader(Stream inputStream, uint imageDataSize, MemoryAllocator mem
///
/// The number of bits to read (should not exceed 16).
/// A ushort value.
+ [MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits)
{
- Guard.MustBeGreaterThan(nBits, 0, nameof(nBits));
+ DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
if (!this.Eos && nBits <= Vp8LMaxNumBitRead)
{
- ulong val = this.PrefetchBits() & this.bitMask[nBits];
+ ulong val = this.PrefetchBits() & BitMask[nBits];
this.bitPos += nBits;
this.ShiftBytes();
return (uint)val;
}
- this.SetEndOfStream();
return 0;
}
@@ -169,6 +169,7 @@ public bool ReadBit()
///
/// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
///
+ [MethodImpl(InliningOptions.ShortMethod)]
public void FillBitWindow()
{
if (this.bitPos >= Wbits)
@@ -181,7 +182,8 @@ public void FillBitWindow()
/// Returns true if there was an attempt at reading bit past the end of the buffer.
///
/// True, if end of buffer was reached.
- public bool IsEndOfStream() => this.Eos || ((this.pos == this.len) && (this.bitPos > Lbits));
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public bool IsEndOfStream() => this.Eos || (this.pos == this.len && this.bitPos > Lbits);
[MethodImpl(InliningOptions.ShortMethod)]
private void DoFillBitWindow() => this.ShiftBytes();
@@ -189,6 +191,7 @@ public void FillBitWindow()
///
/// If not at EOS, reload up to Vp8LLbits byte-by-byte.
///
+ [MethodImpl(InliningOptions.ShortMethod)]
private void ShiftBytes()
{
System.Span dataSpan = this.Data.Memory.Span;
@@ -199,17 +202,6 @@ private void ShiftBytes()
++this.pos;
this.bitPos -= 8;
}
-
- if (this.IsEndOfStream())
- {
- this.SetEndOfStream();
- }
- }
-
- private void SetEndOfStream()
- {
- this.Eos = true;
- this.bitPos = 0; // To avoid undefined behaviour with shifts.
}
}
}
diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
index 41623f2878..9208881360 100644
--- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
+++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
@@ -10,11 +10,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
{
internal abstract class BitWriterBase
{
+ private const uint MaxDimension = 16777215;
+
+ private const ulong MaxCanvasPixels = 4294967295ul;
+
+ protected const uint ExtendedFileChunkSize = WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize;
+
///
/// Buffer to write to.
///
private byte[] buffer;
+ ///
+ /// A scratch buffer to reduce allocations.
+ ///
+ private readonly byte[] scratchBuffer = new byte[4];
+
///
/// Initializes a new instance of the class.
///
@@ -52,15 +63,6 @@ internal abstract class BitWriterBase
///
public abstract void Finish();
- ///
- /// Writes the encoded image to the stream.
- ///
- /// The stream to write to.
- /// The exif profile.
- /// The width of the image.
- /// The height of the image.
- public abstract void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height);
-
protected void ResizeBuffer(int maxBytes, int sizeRequired)
{
int newSize = (3 * maxBytes) >> 1;
@@ -81,13 +83,25 @@ protected void ResizeBuffer(int maxBytes, int sizeRequired)
/// The block length.
protected void WriteRiffHeader(Stream stream, uint riffSize)
{
- Span buf = stackalloc byte[4];
stream.Write(WebpConstants.RiffFourCc);
- BinaryPrimitives.WriteUInt32LittleEndian(buf, riffSize);
- stream.Write(buf);
+ BinaryPrimitives.WriteUInt32LittleEndian(this.scratchBuffer, riffSize);
+ stream.Write(this.scratchBuffer.AsSpan(0, 4));
stream.Write(WebpConstants.WebpHeader);
}
+ ///
+ /// Calculates the exif chunk size.
+ ///
+ /// The exif profile bytes.
+ /// The exif chunk size in bytes.
+ protected uint ExifChunkSize(byte[] exifBytes)
+ {
+ uint exifSize = (uint)exifBytes.Length;
+ uint exifChunkSize = WebpConstants.ChunkHeaderSize + exifSize + (exifSize & 1);
+
+ return exifChunkSize;
+ }
+
///
/// Writes the Exif profile to the stream.
///
@@ -97,12 +111,19 @@ protected void WriteExifProfile(Stream stream, byte[] exifBytes)
{
DebugGuard.NotNull(exifBytes, nameof(exifBytes));
- Span buf = stackalloc byte[4];
+ uint size = (uint)exifBytes.Length;
+ Span buf = this.scratchBuffer.AsSpan(0, 4);
BinaryPrimitives.WriteUInt32BigEndian(buf, (uint)WebpChunkType.Exif);
stream.Write(buf);
- BinaryPrimitives.WriteUInt32LittleEndian(buf, (uint)exifBytes.Length);
+ BinaryPrimitives.WriteUInt32LittleEndian(buf, size);
stream.Write(buf);
stream.Write(exifBytes);
+
+ // Add padding byte if needed.
+ if ((size & 1) == 1)
+ {
+ stream.WriteByte(0);
+ }
}
///
@@ -112,16 +133,16 @@ protected void WriteExifProfile(Stream stream, byte[] exifBytes)
/// A exif profile or null, if it does not exist.
/// The width of the image.
/// The height of the image.
- protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint width, uint height)
+ /// Flag indicating, if a alpha channel is present.
+ protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha)
{
- int maxDimension = 16777215;
- if (width > maxDimension || height > maxDimension)
+ if (width > MaxDimension || height > MaxDimension)
{
- WebpThrowHelper.ThrowInvalidImageDimensions($"Image width or height exceeds maximum allowed dimension of {maxDimension}");
+ WebpThrowHelper.ThrowInvalidImageDimensions($"Image width or height exceeds maximum allowed dimension of {MaxDimension}");
}
// The spec states that the product of Canvas Width and Canvas Height MUST be at most 2^32 - 1.
- if (width * height > 4294967295ul)
+ if (width * height > MaxCanvasPixels)
{
WebpThrowHelper.ThrowInvalidImageDimensions("The product of image width and height MUST be at most 2^32 - 1");
}
@@ -133,7 +154,13 @@ protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint widt
flags |= 8;
}
- Span buf = stackalloc byte[4];
+ if (hasAlpha)
+ {
+ // Set alpha bit.
+ flags |= 16;
+ }
+
+ Span buf = this.scratchBuffer.AsSpan(0, 4);
stream.Write(WebpConstants.Vp8XMagicBytes);
BinaryPrimitives.WriteUInt32LittleEndian(buf, WebpConstants.Vp8XChunkSize);
stream.Write(buf);
diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
index 7628247fd6..3b2f943db5 100644
--- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
+++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
@@ -399,8 +399,15 @@ private void Flush()
}
}
- ///
- public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height)
+ ///
+ /// Writes the encoded image to the stream.
+ ///
+ /// The stream to write to.
+ /// The exif profile.
+ /// The width of the image.
+ /// The height of the image.
+ /// Flag indicating, if a alpha channel is present.
+ public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha)
{
bool isVp8X = false;
byte[] exifBytes = null;
@@ -408,9 +415,9 @@ public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifPr
if (exifProfile != null)
{
isVp8X = true;
- riffSize += WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize;
+ riffSize += ExtendedFileChunkSize;
exifBytes = exifProfile.ToByteArray();
- riffSize += WebpConstants.ChunkHeaderSize + (uint)exifBytes.Length;
+ riffSize += this.ExifChunkSize(exifBytes);
}
this.Finish();
@@ -433,7 +440,7 @@ public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifPr
riffSize += WebpConstants.TagSize + WebpConstants.ChunkHeaderSize + vp8Size;
// Emit headers and partition #0
- this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile);
+ this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, hasAlpha);
bitWriterPartZero.WriteToStream(stream);
// Write the encoded image to the stream.
@@ -616,14 +623,14 @@ private void CodeIntraModes(Vp8BitWriter bitWriter)
while (it.Next());
}
- private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile)
+ private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile, bool hasAlpha)
{
this.WriteRiffHeader(stream, riffSize);
// Write VP8X, header if necessary.
if (isVp8X)
{
- this.WriteVp8XHeader(stream, exifProfile, width, height);
+ this.WriteVp8XHeader(stream, exifProfile, width, height, hasAlpha);
}
this.WriteVp8Header(stream, vp8Size);
diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs
index 2f942231fb..b83865aa36 100644
--- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs
+++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs
@@ -127,19 +127,25 @@ public override void Finish()
this.used = 0;
}
- ///
- public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height)
+ ///
+ /// Writes the encoded image to the stream.
+ ///
+ /// The stream to write to.
+ /// The exif profile.
+ /// The width of the image.
+ /// The height of the image.
+ /// Flag indicating, if a alpha channel is present.
+ public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha)
{
- Span buffer = stackalloc byte[4];
bool isVp8X = false;
byte[] exifBytes = null;
uint riffSize = 0;
if (exifProfile != null)
{
isVp8X = true;
- riffSize += WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize;
+ riffSize += ExtendedFileChunkSize;
exifBytes = exifProfile.ToByteArray();
- riffSize += WebpConstants.ChunkHeaderSize + (uint)exifBytes.Length;
+ riffSize += this.ExifChunkSize(exifBytes);
}
this.Finish();
@@ -154,15 +160,15 @@ public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifPr
// Write VP8X, header if necessary.
if (isVp8X)
{
- this.WriteVp8XHeader(stream, exifProfile, width, height);
+ this.WriteVp8XHeader(stream, exifProfile, width, height, hasAlpha);
}
// Write magic bytes indicating its a lossless webp.
stream.Write(WebpConstants.Vp8LMagicBytes);
// Write Vp8 Header.
- BinaryPrimitives.WriteUInt32LittleEndian(buffer, size);
- stream.Write(buffer);
+ BinaryPrimitives.WriteUInt32LittleEndian(this.scratchBuffer, size);
+ stream.Write(this.scratchBuffer.AsSpan(0, 4));
stream.WriteByte(WebpConstants.Vp8LHeaderMagicByte);
// Write the encoded bytes of the image to the stream.
diff --git a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
index 7dbf49d45e..000de4f88c 100644
--- a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
+++ b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
@@ -35,6 +35,7 @@ internal interface IWebpEncoderOptions
///
/// Gets the number of entropy-analysis passes (in [1..10]).
+ /// Defaults to 1.
///
int EntropyPasses { get; }
diff --git a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
index 70c4efb990..dc546f8ac2 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
@@ -49,6 +49,8 @@ public static Vp8LBackwardRefs GetBackwardReferences(
double bitCostBest = -1;
int cacheBitsInitial = cacheBits;
Vp8LHashChain hashChainBox = null;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1)
{
int cacheBitsTmp = cacheBitsInitial;
@@ -81,7 +83,7 @@ public static Vp8LBackwardRefs GetBackwardReferences(
// Keep the best backward references.
var histo = new Vp8LHistogram(worst, cacheBitsTmp);
- double bitCost = histo.EstimateBits();
+ double bitCost = histo.EstimateBits(stats, bitsEntropy);
if (lz77TypeBest == 0 || bitCost < bitCostBest)
{
@@ -100,7 +102,7 @@ public static Vp8LBackwardRefs GetBackwardReferences(
Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox;
BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst);
var histo = new Vp8LHistogram(worst, cacheBits);
- double bitCostTrace = histo.EstimateBits();
+ double bitCostTrace = histo.EstimateBits(stats, bitsEntropy);
if (bitCostTrace < bitCostBest)
{
best = worst;
@@ -214,9 +216,11 @@ private static int CalculateBestCacheSize(ReadOnlySpan bgra, int quality,
}
}
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i <= cacheBitsMax; i++)
{
- double entropy = histos[i].EstimateBits();
+ double entropy = histos[i].EstimateBits(stats, bitsEntropy);
if (i == 0 || entropy < entropyMin)
{
entropyMin = entropy;
diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
index 8596d85558..02bbc38fcf 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
@@ -1,6 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
+using System.Runtime.CompilerServices;
+
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
///
@@ -41,6 +43,7 @@ public void Init(int hashBits)
/// Inserts a new color into the cache.
///
/// The color to insert.
+ [MethodImpl(InliningOptions.ShortMethod)]
public void Insert(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
@@ -52,6 +55,7 @@ public void Insert(uint bgra)
///
/// The key to lookup.
/// The color for the key.
+ [MethodImpl(InliningOptions.ShortMethod)]
public uint Lookup(int key) => this.Colors[key];
///
@@ -59,6 +63,7 @@ public void Insert(uint bgra)
///
/// The color to check.
/// The index of the color in the cache or -1 if its not present.
+ [MethodImpl(InliningOptions.ShortMethod)]
public int Contains(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
@@ -70,6 +75,7 @@ public int Contains(uint bgra)
///
/// The color.
/// The index for the color.
+ [MethodImpl(InliningOptions.ShortMethod)]
public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift);
///
@@ -77,8 +83,10 @@ public int Contains(uint bgra)
///
/// The key.
/// The color to add.
+ [MethodImpl(InliningOptions.ShortMethod)]
public void Set(uint key, uint bgra) => this.Colors[key] = bgra;
+ [MethodImpl(InliningOptions.ShortMethod)]
public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift);
}
}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
index f2d4fb189f..5d407d73c1 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
@@ -152,10 +152,12 @@ private static void HistogramAnalyzeEntropyBin(List histograms, u
private static int HistogramCopyAndAnalyze(List origHistograms, List histograms, ushort[] histogramSymbols)
{
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int clusterId = 0, i = 0; i < origHistograms.Count; i++)
{
Vp8LHistogram origHistogram = origHistograms[i];
- origHistogram.UpdateHistogramCost();
+ origHistogram.UpdateHistogramCost(stats, bitsEntropy);
// Skip the histogram if it is completely empty, which can happen for tiles with no information (when they are skipped because of LZ77).
if (!origHistogram.IsUsed[0] && !origHistogram.IsUsed[1] && !origHistogram.IsUsed[2] && !origHistogram.IsUsed[3] && !origHistogram.IsUsed[4])
@@ -175,7 +177,14 @@ private static int HistogramCopyAndAnalyze(List origHistograms, L
return numUsed;
}
- private static void HistogramCombineEntropyBin(List histograms, ushort[] clusters, ushort[] clusterMappings, Vp8LHistogram curCombo, ushort[] binMap, int numBins, double combineCostFactor)
+ private static void HistogramCombineEntropyBin(
+ List histograms,
+ ushort[] clusters,
+ ushort[] clusterMappings,
+ Vp8LHistogram curCombo,
+ ushort[] binMap,
+ int numBins,
+ double combineCostFactor)
{
var binInfo = new HistogramBinInfo[BinSize];
for (int idx = 0; idx < numBins; idx++)
@@ -191,6 +200,8 @@ private static void HistogramCombineEntropyBin(List histograms, u
}
var indicesToRemove = new List();
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int idx = 0; idx < histograms.Count; idx++)
{
if (histograms[idx] == null)
@@ -209,7 +220,7 @@ private static void HistogramCombineEntropyBin(List histograms, u
// Try to merge #idx into #first (both share the same binId)
double bitCost = histograms[idx].BitCost;
double bitCostThresh = -bitCost * combineCostFactor;
- double currCostDiff = histograms[first].AddEval(histograms[idx], bitCostThresh, curCombo);
+ double currCostDiff = histograms[first].AddEval(histograms[idx], stats, bitsEntropy, bitCostThresh, curCombo);
if (currCostDiff < bitCostThresh)
{
@@ -308,6 +319,8 @@ private static bool HistogramCombineStochastic(List histograms, i
int numUsed = histograms.Count(h => h != null);
int outerIters = numUsed;
int numTriesNoSuccess = outerIters / 2;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
if (numUsed < minClusterSize)
{
@@ -354,7 +367,7 @@ private static bool HistogramCombineStochastic(List histograms, i
idx2 = mappings[idx2];
// Calculate cost reduction on combination.
- double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost);
+ double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost, stats, bitsEntropy);
// Found a better pair?
if (currCost < 0)
@@ -428,7 +441,7 @@ private static bool HistogramCombineStochastic(List histograms, i
if (doEval)
{
// Re-evaluate the cost of an updated pair.
- HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], 0.0d, p);
+ HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], stats, bitsEntropy, 0.0d, p);
if (p.CostDiff >= 0.0d)
{
histoPriorityList[j] = histoPriorityList[histoPriorityList.Count - 1];
@@ -456,6 +469,8 @@ private static void HistogramCombineGreedy(List histograms)
// Priority list of histogram pairs.
var histoPriorityList = new List();
int maxSize = histoSize * histoSize;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i < histoSize; i++)
{
@@ -471,7 +486,7 @@ private static void HistogramCombineGreedy(List histograms)
continue;
}
- HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d);
+ HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d, stats, bitsEntropy);
}
}
@@ -510,7 +525,7 @@ private static void HistogramCombineGreedy(List histograms)
continue;
}
- HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d);
+ HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d, stats, bitsEntropy);
}
}
}
@@ -519,6 +534,8 @@ private static void HistogramRemap(List input, List 1)
{
for (int i = 0; i < inSize; i++)
@@ -534,7 +551,7 @@ private static void HistogramRemap(List input, List input, List
/// The cost of the pair, or 0 if it superior to threshold.
- private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold)
+ private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{
var pair = new HistogramPair();
@@ -598,7 +615,7 @@ private static double HistoPriorityListPush(List histoList, int m
Vp8LHistogram h1 = histograms[idx1];
Vp8LHistogram h2 = histograms[idx2];
- HistoListUpdatePair(h1, h2, threshold, pair);
+ HistoListUpdatePair(h1, h2, stats, bitsEntropy, threshold, pair);
// Do not even consider the pair if it does not improve the entropy.
if (pair.CostDiff >= threshold)
@@ -616,11 +633,11 @@ private static double HistoPriorityListPush(List histoList, int m
///
/// Update the cost diff and combo of a pair of histograms. This needs to be called when the the histograms have been merged with a third one.
///
- private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, double threshold, HistogramPair pair)
+ private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double threshold, HistogramPair pair)
{
double sumCost = h1.BitCost + h2.BitCost;
pair.CostCombo = 0.0d;
- h1.GetCombinedHistogramEntropy(h2, sumCost + threshold, costInitial: pair.CostCombo, out double cost);
+ h1.GetCombinedHistogramEntropy(h2, stats, bitsEntropy, sumCost + threshold, costInitial: pair.CostCombo, out double cost);
pair.CostCombo = cost;
pair.CostDiff = pair.CostCombo - sumCost;
}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
index cd8be9aac3..0376311ed9 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
@@ -49,14 +49,13 @@ public static int Compare(HuffmanTree t1, HuffmanTree t2)
{
return -1;
}
- else if (t1.TotalCount < t2.TotalCount)
+
+ if (t1.TotalCount < t2.TotalCount)
{
return 1;
}
- else
- {
- return t1.Value < t2.Value ? -1 : 1;
- }
+
+ return t1.Value < t2.Value ? -1 : 1;
}
public IDeepCloneable DeepClone() => new HuffmanTree(this);
diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
index f2321d6813..3c81f1a22c 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
@@ -202,9 +202,14 @@ public static void GenerateOptimalTree(HuffmanTree[] tree, uint[] histogram, int
}
// Build the Huffman tree.
- HuffmanTree[] treeCopy = tree.AsSpan().Slice(0, treeSize).ToArray();
+#if NET5_0_OR_GREATER
+ Span treeSlice = tree.AsSpan(0, treeSize);
+ treeSlice.Sort(HuffmanTree.Compare);
+#else
+ HuffmanTree[] treeCopy = tree.AsSpan(0, treeSize).ToArray();
Array.Sort(treeCopy, HuffmanTree.Compare);
treeCopy.AsSpan().CopyTo(tree);
+#endif
if (treeSize > 1)
{
diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
index b7f94415be..f9b97c6c44 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
@@ -27,6 +27,30 @@ internal static unsafe class LosslessUtils
private const double Log2Reciprocal = 1.44269504088896338700465094007086;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static readonly Vector256 AddGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
+
+ private static readonly Vector128 AddGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
+
+ private static readonly byte AddGreenToBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+
+ private static readonly Vector256 SubtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
+
+ private static readonly Vector128 SubtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
+
+ private static readonly byte SubtractGreenFromBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+
+ private static readonly Vector128 TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly Vector128 TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
+
+ private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+
+ private static readonly Vector128 TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+#endif
+
///
/// Returns the exact index where array1 and array2 are different. For an index
/// inferior or equal to bestLenMatch, the return value just has to be strictly
@@ -97,7 +121,6 @@ public static void AddGreenToBlueAndRed(Span pixelData)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
- var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -106,7 +129,7 @@ public static void AddGreenToBlueAndRed(Span pixelData)
{
uint* idx = p + i;
Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte();
- Vector256 in0g0g = Avx2.Shuffle(input, mask);
+ Vector256 in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
Vector256 output = Avx2.Add(input, in0g0g);
Avx.Store((byte*)idx, output);
}
@@ -119,7 +142,6 @@ public static void AddGreenToBlueAndRed(Span pixelData)
}
else if (Ssse3.IsSupported)
{
- var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -128,7 +150,7 @@ public static void AddGreenToBlueAndRed(Span pixelData)
{
uint* idx = p + i;
Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte();
- Vector128 in0g0g = Ssse3.Shuffle(input, mask);
+ Vector128 in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
Vector128 output = Sse2.Add(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
@@ -141,7 +163,6 @@ public static void AddGreenToBlueAndRed(Span pixelData)
}
else if (Sse2.IsSupported)
{
- byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -151,8 +172,8 @@ public static void AddGreenToBlueAndRed(Span pixelData)
uint* idx = p + i;
Vector128 input = Sse2.LoadVector128((ushort*)idx);
Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
- Vector128 b = Sse2.ShuffleLow(a, mask);
- Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g
+ Vector128 b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
Vector128 output = Sse2.Add(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
@@ -189,7 +210,6 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
- var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -198,7 +218,7 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData)
{
uint* idx = p + i;
Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte();
- Vector256 in0g0g = Avx2.Shuffle(input, mask);
+ Vector256 in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
Vector256 output = Avx2.Subtract(input, in0g0g);
Avx.Store((byte*)idx, output);
}
@@ -211,7 +231,6 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData)
}
else if (Ssse3.IsSupported)
{
- var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -220,7 +239,7 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData)
{
uint* idx = p + i;
Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte();
- Vector128 in0g0g = Ssse3.Shuffle(input, mask);
+ Vector128 in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
Vector128 output = Sse2.Subtract(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
@@ -233,7 +252,6 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData)
}
else if (Sse2.IsSupported)
{
- byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -243,8 +261,8 @@ public static void SubtractGreenFromBlueAndRed(Span pixelData)
uint* idx = p + i;
Vector128 input = Sse2.LoadVector128((ushort*)idx);
Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
- Vector128 b = Sse2.ShuffleLow(a, mask);
- Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g
+ Vector128 b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
@@ -394,9 +412,6 @@ public static void TransformColor(Vp8LMultipliers m, Span data, int numPix
{
Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
- var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
- var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
- byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
fixed (uint* src = data)
{
int idx;
@@ -404,15 +419,15 @@ public static void TransformColor(Vp8LMultipliers m, Span data, int numPix
{
uint* pos = src + idx;
Vector128 input = Sse2.LoadVector128(pos);
- Vector128 a = Sse2.And(input.AsByte(), maskalphagreen);
- Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask);
- Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask);
+ Vector128 a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
+ Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128 g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128 h = Sse2.Add(g.AsByte(), d.AsByte());
- Vector128 i = Sse2.And(h, maskredblue);
+ Vector128 i = Sse2.And(h, TransformColorRedBlueMask);
Vector128 output = Sse2.Subtract(input.AsByte(), i);
Sse2.Store((byte*)pos, output);
}
@@ -460,8 +475,6 @@ public static void TransformColorInverse(Vp8LMultipliers m, Span pixelData
{
Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
- var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
- byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
fixed (uint* src = pixelData)
{
int idx;
@@ -469,9 +482,9 @@ public static void TransformColorInverse(Vp8LMultipliers m, Span pixelData
{
uint* pos = src + idx;
Vector128 input = Sse2.LoadVector128(pos);
- Vector128 a = Sse2.And(input.AsByte(), maskalphagreen);
- Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask);
- Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask);
+ Vector128 a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
+ Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128 e = Sse2.Add(input.AsByte(), d.AsByte());
Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
@@ -551,6 +564,7 @@ public static void PredictorInverseTransform(
int mask = tileWidth - 1;
int tilesPerRow = SubSampleSize(width, transform.Bits);
int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow;
+ Span scratch = stackalloc short[8];
while (y < yEnd)
{
int predictorModeIdx = predictorModeIdxBase;
@@ -608,7 +622,7 @@ public static void PredictorInverseTransform(
PredictorAdd10(input + x, output + x - width, xEnd - x, output + x);
break;
case 11:
- PredictorAdd11(input + x, output + x - width, xEnd - x, output + x);
+ PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch);
break;
case 12:
PredictorAdd12(input + x, output + x - width, xEnd - x, output + x);
@@ -704,7 +718,7 @@ public static void BundleColorMap(Span row, int width, int xBits, Span
/// Shanon entropy.
- public static float CombinedShannonEntropy(int[] x, int[] y)
+ public static float CombinedShannonEntropy(Span x, Span y)
{
double retVal = 0.0d;
uint sumX = 0, sumXY = 0;
@@ -751,6 +765,7 @@ public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint a
///
/// Fast calculation of log2(v) for integer input.
///
+ [MethodImpl(InliningOptions.ShortMethod)]
public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v);
///
@@ -779,7 +794,7 @@ public static void ColorCodeToMultipliers(uint colorCode, ref Vp8LMultipliers m)
private static float FastSLog2Slow(uint v)
{
- Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
+ DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
@@ -974,11 +989,11 @@ private static void PredictorAdd10(uint* input, uint* upper, int numberOfPixels,
}
[MethodImpl(InliningOptions.ShortMethod)]
- private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output)
+ private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span scratch)
{
for (int x = 0; x < numberOfPixels; x++)
{
- uint pred = Predictor11(output[x - 1], upper + x);
+ uint pred = Predictor11(output[x - 1], upper + x, scratch);
output[x] = AddPixels(input[x], pred);
}
}
@@ -1031,7 +1046,7 @@ private static void PredictorAdd13(uint* input, uint* upper, int numberOfPixels,
public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]);
[MethodImpl(InliningOptions.ShortMethod)]
- public static uint Predictor11(uint left, uint* top) => Select(top[0], left, top[-1]);
+ public static uint Predictor11(uint left, uint* top, Span scratch) => Select(top[0], left, top[-1], scratch);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]);
@@ -1148,11 +1163,11 @@ public static void PredictorSub10(uint* input, uint* upper, int numPixels, uint*
}
[MethodImpl(InliningOptions.ShortMethod)]
- public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output)
+ public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span scratch)
{
for (int x = 0; x < numPixels; x++)
{
- uint pred = Predictor11(input[x - 1], upper + x);
+ uint pred = Predictor11(input[x - 1], upper + x, scratch);
output[x] = SubPixels(input[x], pred);
}
}
@@ -1200,30 +1215,65 @@ public static uint AddPixels(uint a, uint b)
private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
{
- int a = AddSubtractComponentFull(
- (int)(c0 >> 24),
- (int)(c1 >> 24),
- (int)(c2 >> 24));
- int r = AddSubtractComponentFull(
- (int)((c0 >> 16) & 0xff),
- (int)((c1 >> 16) & 0xff),
- (int)((c2 >> 16) & 0xff));
- int g = AddSubtractComponentFull(
- (int)((c0 >> 8) & 0xff),
- (int)((c1 >> 8) & 0xff),
- (int)((c2 >> 8) & 0xff));
- int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
- return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero);
+ Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero);
+ Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero);
+ Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16());
+ Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16());
+ Vector128 b = Sse2.PackUnsignedSaturate(v2, v2);
+ uint output = Sse2.ConvertToUInt32(b.AsUInt32());
+ return output;
+ }
+#endif
+ {
+ int a = AddSubtractComponentFull(
+ (int)(c0 >> 24),
+ (int)(c1 >> 24),
+ (int)(c2 >> 24));
+ int r = AddSubtractComponentFull(
+ (int)((c0 >> 16) & 0xff),
+ (int)((c1 >> 16) & 0xff),
+ (int)((c2 >> 16) & 0xff));
+ int g = AddSubtractComponentFull(
+ (int)((c0 >> 8) & 0xff),
+ (int)((c1 >> 8) & 0xff),
+ (int)((c2 >> 8) & 0xff));
+ int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
+ return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+ }
}
private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
{
- uint ave = Average2(c0, c1);
- int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
- int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
- int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
- int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
- return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero);
+ Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero);
+ Vector128 b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero);
+ Vector128 avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16());
+ Vector128 a0 = Sse2.ShiftRightLogical(avg, 1);
+ Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16());
+ Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16());
+ Vector128 a2 = Sse2.Subtract(a1, bgta);
+ Vector128 a3 = Sse2.ShiftRightArithmetic(a2, 1);
+ Vector128 a4 = Sse2.Add(a0, a3).AsInt16();
+ Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4);
+ uint output = Sse2.ConvertToUInt32(a5.AsUInt32());
+ return output;
+ }
+#endif
+ {
+ uint ave = Average2(c0, c1);
+ int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
+ int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
+ int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
+ int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
+ return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+ }
}
[MethodImpl(InliningOptions.ShortMethod)]
@@ -1240,14 +1290,41 @@ private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
private static Vector128 MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
#endif
- private static uint Select(uint a, uint b, uint c)
+ private static uint Select(uint a, uint b, uint c, Span scratch)
{
- int paMinusPb =
- Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
- Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
- Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
- Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
- return paMinusPb <= 0 ? a : b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ Span output = scratch;
+ fixed (short* p = output)
+ {
+ Vector128 a0 = Sse2.ConvertScalarToVector128UInt32(a).AsByte();
+ Vector128 b0 = Sse2.ConvertScalarToVector128UInt32(b).AsByte();
+ Vector128 c0 = Sse2.ConvertScalarToVector128UInt32(c).AsByte();
+ Vector128 ac0 = Sse2.SubtractSaturate(a0, c0);
+ Vector128 ca0 = Sse2.SubtractSaturate(c0, a0);
+ Vector128 bc0 = Sse2.SubtractSaturate(b0, c0);
+ Vector128 cb0 = Sse2.SubtractSaturate(c0, b0);
+ Vector128 ac = Sse2.Or(ac0, ca0);
+ Vector128 bc = Sse2.Or(bc0, cb0);
+ Vector128 pa = Sse2.UnpackLow(ac, Vector128.Zero); // |a - c|
+ Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c|
+ Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
+ Sse2.Store((ushort*)p, diff);
+ int paMinusPb = output[3] + output[2] + output[1] + output[0];
+ return (paMinusPb <= 0) ? a : b;
+ }
+ }
+ else
+#endif
+ {
+ int paMinusPb =
+ Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
+ Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
+ Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
+ Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
+ return paMinusPb <= 0 ? a : b;
+ }
}
[MethodImpl(InliningOptions.ShortMethod)]
diff --git a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
index 2d71a7af64..6cd109121d 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
@@ -15,7 +15,7 @@ internal class PixOrCopy
public uint BgraOrDistance { get; set; }
public static PixOrCopy CreateCacheIdx(int idx) =>
- new PixOrCopy()
+ new()
{
Mode = PixOrCopyMode.CacheIdx,
BgraOrDistance = (uint)idx,
@@ -23,14 +23,14 @@ public static PixOrCopy CreateCacheIdx(int idx) =>
};
public static PixOrCopy CreateLiteral(uint bgra) =>
- new PixOrCopy()
+ new()
{
Mode = PixOrCopyMode.Literal,
BgraOrDistance = bgra,
Len = 1
};
- public static PixOrCopy CreateCopy(uint distance, ushort len) => new PixOrCopy()
+ public static PixOrCopy CreateCopy(uint distance, ushort len) => new()
{
Mode = PixOrCopyMode.Copy,
BgraOrDistance = distance,
diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
index 671e9a043e..99504dd488 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
@@ -17,6 +17,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
internal static unsafe class PredictorEncoder
{
+ private static readonly sbyte[][] Offset =
+ {
+ new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 }
+ };
+
private const int GreenRedToBlueNumAxis = 8;
private const int GreenRedToBlueMaxIters = 7;
@@ -29,6 +34,25 @@ internal static unsafe class PredictorEncoder
private const int PredLowEffort = 11;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static readonly Vector128 CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
+
+ private static readonly Vector128 CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
+
+ private static readonly Vector128 CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly Vector128 CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
+
+ private static readonly Vector128 CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
+
+ private static readonly Vector128 CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
+
+ private static readonly Vector128 CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
+#endif
+
+ // This uses C#'s compiler optimization to refer to assembly's static data directly.
+ private static ReadOnlySpan DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 };
+
///
/// Finds the best predictor for each tile, and converts the image to residuals
/// with respect to predictions. If nearLosslessQuality < 100, applies
@@ -41,6 +65,8 @@ public static void ResidualImage(
Span bgra,
Span bgraScratch,
Span image,
+ int[][] histoArgb,
+ int[][] bestHisto,
bool nearLossless,
int nearLosslessQuality,
WebpTransparentColorMode transparentColorMode,
@@ -50,6 +76,7 @@ public static void ResidualImage(
int tilesPerRow = LosslessUtils.SubSampleSize(width, bits);
int tilesPerCol = LosslessUtils.SubSampleSize(height, bits);
int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality);
+ Span scratch = stackalloc short[8];
// TODO: Can we optimize this?
int[][] histo = new int[4][];
@@ -80,11 +107,14 @@ public static void ResidualImage(
histo,
bgraScratch,
bgra,
+ histoArgb,
+ bestHisto,
maxQuantization,
transparentColorMode,
usedSubtractGreen,
nearLossless,
- image);
+ image,
+ scratch);
image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8));
}
@@ -105,7 +135,7 @@ public static void ResidualImage(
lowEffort);
}
- public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span bgra, Span image)
+ public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span bgra, Span image, Span scratch)
{
int maxTileSize = 1 << bits;
int tileXSize = LosslessUtils.SubSampleSize(width, bits);
@@ -139,7 +169,8 @@ public static void ColorSpaceTransform(int width, int height, int bits, int qual
height,
accumulatedRedHisto,
accumulatedBlueHisto,
- bgra);
+ bgra,
+ scratch);
image[offset] = MultipliersToColorCode(prevX);
CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, bgra);
@@ -188,11 +219,14 @@ private static int GetBestPredictorForTile(
int[][] accumulated,
Span argbScratch,
Span argb,
+ int[][] histoArgb,
+ int[][] bestHisto,
int maxQuantization,
WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen,
bool nearLossless,
- Span modes)
+ Span modes,
+ Span scratch)
{
const int numPredModes = 14;
int startX = tileX << bits;
@@ -222,21 +256,14 @@ private static int GetBestPredictorForTile(
float bestDiff = MaxDiffCost;
int bestMode = 0;
uint[] residuals = new uint[1 << WebpConstants.MaxTransformBits];
- int[][] histoArgb = new int[4][];
- int[][] bestHisto = new int[4][];
for (int i = 0; i < 4; i++)
{
- histoArgb[i] = new int[256];
- bestHisto[i] = new int[256];
+ histoArgb[i].AsSpan().Clear();
+ bestHisto[i].AsSpan().Clear();
}
for (int mode = 0; mode < numPredModes; mode++)
{
- for (int i = 0; i < 4; i++)
- {
- histoArgb[i].AsSpan().Fill(0);
- }
-
if (startY > 0)
{
// Read the row above the tile which will become the first upper_row.
@@ -272,7 +299,7 @@ private static int GetBestPredictorForTile(
}
}
- GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals);
+ GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals, scratch);
for (int relativeX = 0; relativeX < maxX; ++relativeX)
{
UpdateHisto(histoArgb, residuals[relativeX]);
@@ -300,6 +327,11 @@ private static int GetBestPredictorForTile(
bestDiff = curDiff;
bestMode = mode;
}
+
+ for (int i = 0; i < 4; i++)
+ {
+ histoArgb[i].AsSpan().Clear();
+ }
}
for (int i = 0; i < 4; i++)
@@ -333,11 +365,12 @@ private static void GetResidual(
WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen,
bool nearLossless,
- Span output)
+ Span output,
+ Span scratch)
{
if (transparentColorMode == WebpTransparentColorMode.Preserve)
{
- PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output);
+ PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output, scratch);
}
else
{
@@ -395,7 +428,7 @@ private static void GetResidual(
predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x);
break;
case 11:
- predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x);
+ predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x, scratch);
break;
case 12:
predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x);
@@ -583,6 +616,7 @@ private static void CopyImageWithPrediction(
Span currentMaxDiffs = MemoryMarshal.Cast(currentRow.Slice(width + 1));
Span lowerMaxDiffs = currentMaxDiffs.Slice(width);
+ Span scratch = stackalloc short[8];
for (int y = 0; y < height; y++)
{
Span tmp32 = upperRow;
@@ -593,7 +627,7 @@ private static void CopyImageWithPrediction(
if (lowEffort)
{
- PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width));
+ PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width), scratch);
}
else
{
@@ -634,7 +668,8 @@ private static void CopyImageWithPrediction(
transparentColorMode,
usedSubtractGreen,
nearLossless,
- argb.Slice((y * width) + x));
+ argb.Slice((y * width) + x),
+ scratch);
x = xEnd;
}
@@ -649,7 +684,8 @@ private static void PredictBatch(
int numPixels,
Span currentSpan,
Span upperSpan,
- Span outputSpan)
+ Span outputSpan,
+ Span scratch)
{
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* current = currentSpan)
@@ -718,7 +754,7 @@ private static void PredictBatch(
LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output);
break;
case 11:
- LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output);
+ LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output, scratch);
break;
case 12:
LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output);
@@ -819,7 +855,19 @@ private static void CopyTileWithColorTransform(int xSize, int ySize, int tileX,
}
}
- private static Vp8LMultipliers GetBestColorTransformForTile(int tileX, int tileY, int bits, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int xSize, int ySize, int[] accumulatedRedHisto, int[] accumulatedBlueHisto, Span argb)
+ private static Vp8LMultipliers GetBestColorTransformForTile(
+ int tileX,
+ int tileY,
+ int bits,
+ Vp8LMultipliers prevX,
+ Vp8LMultipliers prevY,
+ int quality,
+ int xSize,
+ int ySize,
+ int[] accumulatedRedHisto,
+ int[] accumulatedBlueHisto,
+ Span argb,
+ Span scratch)
{
int maxTileSize = 1 << bits;
int tileYOffset = tileY * maxTileSize;
@@ -832,18 +880,28 @@ private static Vp8LMultipliers GetBestColorTransformForTile(int tileX, int tileY
var bestTx = default(Vp8LMultipliers);
- GetBestGreenToRed(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx);
+ GetBestGreenToRed(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx);
- GetBestGreenRedToBlue(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx);
+ GetBestGreenRedToBlue(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx);
return bestTx;
}
- private static void GetBestGreenToRed(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedRedHisto, ref Vp8LMultipliers bestTx)
+ private static void GetBestGreenToRed(
+ Span argb,
+ int stride,
+ Span scratch,
+ int tileWidth,
+ int tileHeight,
+ Vp8LMultipliers prevX,
+ Vp8LMultipliers prevY,
+ int quality,
+ int[] accumulatedRedHisto,
+ ref Vp8LMultipliers bestTx)
{
int maxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
int greenToRedBest = 0;
- double bestDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto);
+ double bestDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto);
for (int iter = 0; iter < maxIters; iter++)
{
// ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
@@ -855,7 +913,7 @@ private static void GetBestGreenToRed(Span argb, int stride, int tileWidth
for (int offset = -delta; offset <= delta; offset += 2 * delta)
{
int greenToRedCur = offset + greenToRedBest;
- double curDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto);
+ double curDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto);
if (curDiff < bestDiff)
{
bestDiff = curDiff;
@@ -867,24 +925,22 @@ private static void GetBestGreenToRed(Span argb, int stride, int tileWidth
bestTx.GreenToRed = (byte)(greenToRedBest & 0xff);
}
- private static void GetBestGreenRedToBlue(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx)
+ private static void GetBestGreenRedToBlue(Span argb, int stride, Span scratch, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx)
{
int iters = (quality < 25) ? 1 : (quality > 50) ? GreenRedToBlueMaxIters : 4;
int greenToBlueBest = 0;
int redToBlueBest = 0;
- sbyte[][] offset = { new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } };
- sbyte[] deltaLut = { 16, 16, 8, 4, 2, 2, 2 };
// Initial value at origin:
- double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto);
+ double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto);
for (int iter = 0; iter < iters; iter++)
{
- int delta = deltaLut[iter];
+ int delta = DeltaLut[iter];
for (int axis = 0; axis < GreenRedToBlueNumAxis; axis++)
{
- int greenToBlueCur = (offset[axis][0] * delta) + greenToBlueBest;
- int redToBlueCur = (offset[axis][1] * delta) + redToBlueBest;
- double curDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto);
+ int greenToBlueCur = (Offset[axis][0] * delta) + greenToBlueBest;
+ int redToBlueCur = (Offset[axis][1] * delta) + redToBlueBest;
+ double curDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto);
if (curDiff < bestDiff)
{
bestDiff = curDiff;
@@ -910,9 +966,19 @@ private static void GetBestGreenRedToBlue(Span argb, int stride, int tileW
bestTx.RedToBlue = (byte)(redToBlueBest & 0xff);
}
- private static double GetPredictionCostCrossColorRed(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToRed, int[] accumulatedRedHisto)
+ private static double GetPredictionCostCrossColorRed(
+ Span argb,
+ int stride,
+ Span scratch,
+ int tileWidth,
+ int tileHeight,
+ Vp8LMultipliers prevX,
+ Vp8LMultipliers prevY,
+ int greenToRed,
+ int[] accumulatedRedHisto)
{
- int[] histo = new int[256];
+ Span histo = scratch.Slice(0, 256);
+ histo.Clear();
CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo);
double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo);
@@ -937,9 +1003,20 @@ private static double GetPredictionCostCrossColorRed(Span argb, int stride
return curDiff;
}
- private static double GetPredictionCostCrossColorBlue(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToBlue, int redToBlue, int[] accumulatedBlueHisto)
+ private static double GetPredictionCostCrossColorBlue(
+ Span argb,
+ int stride,
+ Span scratch,
+ int tileWidth,
+ int tileHeight,
+ Vp8LMultipliers prevX,
+ Vp8LMultipliers prevY,
+ int greenToBlue,
+ int redToBlue,
+ int[] accumulatedBlueHisto)
{
- int[] histo = new int[256];
+ Span histo = scratch.Slice(0, 256);
+ histo.Clear();
CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo);
@@ -980,15 +1057,12 @@ private static double GetPredictionCostCrossColorBlue(Span argb, int strid
return curDiff;
}
- private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo)
+ private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
- var maskgreen = Vector128.Create(0x00ff00);
- var mask = Vector128.Create((short)0xff);
-
const int span = 8;
Span values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
@@ -1004,15 +1078,15 @@ private static void CollectColorRedTransforms(Span bgra, int stride, int t
uint* input1Idx = src + x + (span / 2);
Vector128 input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128 input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
- Vector128 g0 = Sse2.And(input0, maskgreen.AsByte()); // 0 0 | g 0
- Vector128 g1 = Sse2.And(input1, maskgreen.AsByte());
+ Vector128 g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
+ Vector128 g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128 g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128 a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128 a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128 a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128 c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
- Vector128 d = Sse2.And(c, mask.AsByte()); // 0 r'
+ Vector128 d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
Sse2.Store(dst, d.AsUInt16());
for (int i = 0; i < span; i++)
{
@@ -1036,7 +1110,7 @@ private static void CollectColorRedTransforms(Span bgra, int stride, int t
}
}
- private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo)
+ private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo)
{
int pos = 0;
while (tileHeight-- > 0)
@@ -1051,7 +1125,7 @@ private static void CollectColorRedTransformsNoneVectorized(Span bgra, int
}
}
- private static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo)
+ private static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
@@ -1060,12 +1134,6 @@ private static void CollectColorBlueTransforms(Span bgra, int stride, int
Span values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
- var maskgreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
- var maskgreenblue = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
- var maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
- var shufflerLow = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
- var shufflerHigh = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
-
for (int y = 0; y < tileHeight; y++)
{
Span srcSpan = bgra.Slice(y * stride);
@@ -1079,18 +1147,18 @@ private static void CollectColorBlueTransforms(Span bgra, int stride, int
uint* input1Idx = src + x + (span / 2);
Vector128 input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128 input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
- Vector128 r0 = Ssse3.Shuffle(input0, shufflerLow);
- Vector128 r1 = Ssse3.Shuffle(input1, shufflerHigh);
+ Vector128 r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
+ Vector128 r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128 r = Sse2.Or(r0, r1);
- Vector128 gb0 = Sse2.And(input0, maskgreenblue);
- Vector128 gb1 = Sse2.And(input1, maskgreenblue);
+ Vector128 gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
+ Vector128 gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128 gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
- Vector128 g = Sse2.And(gb.AsByte(), maskgreen);
+ Vector128 g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128 a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128 c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128 d = Sse2.Subtract(c, a.AsByte());
- Vector128 e = Sse2.And(d, maskblue);
+ Vector128 e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
Sse2.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++)
{
@@ -1114,7 +1182,7 @@ private static void CollectColorBlueTransforms(Span bgra, int stride, int
}
}
- private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo)
+ private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo)
{
int pos = 0;
while (tileHeight-- > 0)
@@ -1143,7 +1211,7 @@ private static float PredictionCostSpatialHistogram(int[][] accumulated, int[][]
}
[MethodImpl(InliningOptions.ShortMethod)]
- private static double PredictionCostCrossColor(int[] accumulated, int[] counts)
+ private static double PredictionCostCrossColor(int[] accumulated, Span counts)
{
// Favor low entropy, locally and globally.
// Favor small absolute values for PredictionCostSpatial.
@@ -1152,7 +1220,7 @@ private static double PredictionCostCrossColor(int[] accumulated, int[] counts)
}
[MethodImpl(InliningOptions.ShortMethod)]
- private static float PredictionCostSpatial(int[] counts, int weight0, double expVal)
+ private static float PredictionCostSpatial(Span counts, int weight0, double expVal)
{
int significantSymbols = 256 >> 4;
double expDecayFactor = 0.6;
diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
index 693585637c..da815a479a 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
@@ -19,6 +19,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
internal class Vp8LEncoder : IDisposable
{
+ ///
+ /// Scratch buffer to reduce allocations.
+ ///
+ private readonly int[] scratch = new int[256];
+
+ private readonly int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] };
+
+ private readonly int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] };
+
///
/// The to use for buffer allocations.
///
@@ -128,6 +137,9 @@ public Vp8LEncoder(
}
}
+ // This uses C#'s compiler optimization to refer to assembly's static data directly.
+ private static ReadOnlySpan Order => new byte[] { 1, 2, 0, 3 };
+
///
/// Gets the memory for the image data as packed bgra values.
///
@@ -234,7 +246,7 @@ public void Encode(Image image, Stream stream)
this.EncodeStream(image);
// Write bytes from the bitwriter buffer to the stream.
- this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height);
+ this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height, hasAlpha);
}
///
@@ -675,6 +687,8 @@ private void ApplyPredictFilter(int width, int height, bool lowEffort)
this.EncodedData.GetSpan(),
this.BgraScratch.GetSpan(),
this.TransformData.GetSpan(),
+ this.histoArgb,
+ this.bestHisto,
this.nearLossless,
nearLosslessStrength,
this.transparentColorMode,
@@ -694,7 +708,7 @@ private void ApplyCrossColorFilter(int width, int height, bool lowEffort)
int transformWidth = LosslessUtils.SubSampleSize(width, colorTransformBits);
int transformHeight = LosslessUtils.SubSampleSize(height, colorTransformBits);
- PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan());
+ PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan(), this.scratch);
this.bitWriter.PutBits(WebpConstants.TransformPresent, 1);
this.bitWriter.PutBits((uint)Vp8LTransformType.CrossColorTransform, 2);
@@ -736,7 +750,7 @@ private void EncodeImageNoHuffman(Span bgra, Vp8LHashChain hashChain, Vp8L
var histogramImage = new List()
{
- new Vp8LHistogram(cacheBits)
+ new(cacheBits)
};
// Build histogram image and symbols from backward references.
@@ -780,7 +794,8 @@ private void EncodeImageNoHuffman(Span bgra, Vp8LHashChain hashChain, Vp8L
private void StoreHuffmanCode(HuffmanTree[] huffTree, HuffmanTreeToken[] tokens, HuffmanTreeCode huffmanCode)
{
int count = 0;
- int[] symbols = { 0, 0 };
+ Span symbols = this.scratch.AsSpan(0, 2);
+ symbols.Clear();
int maxBits = 8;
int maxSymbol = 1 << maxBits;
@@ -973,10 +988,9 @@ private void StoreImageToBitMask(int width, int histoBits, Vp8LBackwardRefs back
if (v.IsLiteral())
{
- byte[] order = { 1, 2, 0, 3 };
for (int k = 0; k < 4; k++)
{
- int code = (int)v.Literal(order[k]);
+ int code = (int)v.Literal(Order[k]);
this.bitWriter.WriteHuffmanCode(codes[k], code);
}
}
@@ -1092,9 +1106,10 @@ private EntropyIx AnalyzeEntropy(ReadOnlySpan bgra, int width, int height,
histo[(int)HistoIx.HistoBluePred * 256]++;
histo[(int)HistoIx.HistoAlphaPred * 256]++;
+ var bitEntropy = new Vp8LBitEntropy();
for (int j = 0; j < (int)HistoIx.HistoTotal; j++)
{
- var bitEntropy = new Vp8LBitEntropy();
+ bitEntropy.Init();
Span curHisto = histo.Slice(j * 256, 256);
bitEntropy.BitsEntropyUnrefined(curHisto, 256);
entropyComp[j] = bitEntropy.BitsEntropyRefine();
@@ -1190,9 +1205,14 @@ private bool AnalyzeAndCreatePalette(ReadOnlySpan bgra, int width, int hei
return false;
}
+#if NET5_0_OR_GREATER
+ var paletteSlice = palette.Slice(0, this.PaletteSize);
+ paletteSlice.Sort();
+#else
uint[] paletteArray = palette.Slice(0, this.PaletteSize).ToArray();
Array.Sort(paletteArray);
paletteArray.CopyTo(palette);
+#endif
if (PaletteHasNonMonotonousDeltas(palette, this.PaletteSize))
{
@@ -1447,7 +1467,8 @@ private static int SearchColorNoIdx(uint[] sorted, uint color, int hi)
{
return mid;
}
- else if (sorted[mid] < color)
+
+ if (sorted[mid] < color)
{
low = mid;
}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
index 42260e2b25..8b02015687 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
@@ -157,29 +157,30 @@ public void AddSinglePixOrCopy(PixOrCopy v, bool useDistanceModifier, int xSize
/// Estimate how many bits the combined entropy of literals and distance approximately maps to.
///
/// Estimated bits.
- public double EstimateBits()
+ public double EstimateBits(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{
uint notUsed = 0;
return
- PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0])
- + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1])
- + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2])
- + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3])
- + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4])
+ PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0], stats, bitsEntropy)
+ + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1], stats, bitsEntropy)
+ + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2], stats, bitsEntropy)
+ + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3], stats, bitsEntropy)
+ + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy)
+ ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes)
+ ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
}
- public void UpdateHistogramCost()
+ public void UpdateHistogramCost(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{
uint alphaSym = 0, redSym = 0, blueSym = 0;
uint notUsed = 0;
- double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3]);
- double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
+
+ double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3], stats, bitsEntropy);
+ double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
int numCodes = this.NumCodes();
- this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0]) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
- this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1]);
- this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2]);
+ this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0], stats, bitsEntropy) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
+ this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1], stats, bitsEntropy);
+ this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2], stats, bitsEntropy);
this.BitCost = this.LiteralCost + this.RedCost + this.BlueCost + alphaCost + distanceCost;
if ((alphaSym | redSym | blueSym) == NonTrivialSym)
{
@@ -198,11 +199,11 @@ public void UpdateHistogramCost()
/// Since the previous score passed is 'costThreshold', we only need to compare
/// the partial cost against 'costThreshold + C(a) + C(b)' to possibly bail-out early.
///
- public double AddEval(Vp8LHistogram b, double costThreshold, Vp8LHistogram output)
+ public double AddEval(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold, Vp8LHistogram output)
{
double sumCost = this.BitCost + b.BitCost;
costThreshold += sumCost;
- if (this.GetCombinedHistogramEntropy(b, costThreshold, costInitial: 0, out double cost))
+ if (this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial: 0, out double cost))
{
this.Add(b, output);
output.BitCost = cost;
@@ -212,10 +213,10 @@ public double AddEval(Vp8LHistogram b, double costThreshold, Vp8LHistogram outpu
return cost - sumCost;
}
- public double AddThresh(Vp8LHistogram b, double costThreshold)
+ public double AddThresh(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold)
{
double costInitial = -this.BitCost;
- this.GetCombinedHistogramEntropy(b, costThreshold, costInitial, out double cost);
+ this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial, out double cost);
return cost;
}
@@ -239,12 +240,12 @@ public void Add(Vp8LHistogram b, Vp8LHistogram output)
: NonTrivialSym;
}
- public bool GetCombinedHistogramEntropy(Vp8LHistogram b, double costThreshold, double costInitial, out double cost)
+ public bool GetCombinedHistogramEntropy(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy, double costThreshold, double costInitial, out double cost)
{
bool trivialAtEnd = false;
cost = costInitial;
- cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false);
+ cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false, stats, bitEntropy);
cost += ExtraCostCombined(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), b.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
@@ -267,25 +268,25 @@ public bool GetCombinedHistogramEntropy(Vp8LHistogram b, double costThreshold, d
}
}
- cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd);
+ cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold)
{
return false;
}
- cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd);
+ cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold)
{
return false;
}
- cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd);
+ cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold)
{
return false;
}
- cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false);
+ cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false, stats, bitEntropy);
if (cost > costThreshold)
{
return false;
@@ -415,9 +416,10 @@ private void AddDistance(Vp8LHistogram b, Vp8LHistogram output, int size)
}
}
- private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd)
+ private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy)
{
- var stats = new Vp8LStreaks();
+ stats.Clear();
+ bitEntropy.Init();
if (trivialAtEnd)
{
// This configuration is due to palettization that transforms an indexed
@@ -435,7 +437,6 @@ private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool is
return stats.FinalHuffmanCost();
}
- var bitEntropy = new Vp8LBitEntropy();
if (isXUsed)
{
if (isYUsed)
@@ -479,10 +480,10 @@ private static double ExtraCostCombined(Span x, Span y, int length)
///
/// Get the symbol entropy for the distribution 'population'.
///
- private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed)
+ private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy)
{
- var bitEntropy = new Vp8LBitEntropy();
- var stats = new Vp8LStreaks();
+ bitEntropy.Init();
+ stats.Clear();
bitEntropy.BitsEntropyUnrefined(population, length, stats);
trivialSym = (bitEntropy.NoneZeros == 1) ? bitEntropy.NoneZeroCode : NonTrivialSym;
diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs
index 27ddcfd434..df9f064426 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs
@@ -1,6 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
+using System;
+
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal class Vp8LStreaks
@@ -28,6 +30,13 @@ public Vp8LStreaks()
///
public int[][] Streaks { get; }
+ public void Clear()
+ {
+ this.Counts.AsSpan().Clear();
+ this.Streaks[0].AsSpan().Clear();
+ this.Streaks[1].AsSpan().Clear();
+ }
+
public double FinalHuffmanCost()
{
// The constants in this function are experimental and got rounded from
diff --git a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
index 9604160091..4f7a4eb3d8 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
@@ -218,7 +218,7 @@ public void DecodeImageData(Vp8LDecoder decoder, Span pixelData)
ColorCache colorCache = decoder.Metadata.ColorCache;
int colorCacheLimit = lenCodeLimit + colorCacheSize;
int mask = decoder.Metadata.HuffmanMask;
- HTreeGroup[] hTreeGroup = GetHTreeGroupForPos(decoder.Metadata, col, row);
+ Span hTreeGroup = GetHTreeGroupForPos(decoder.Metadata, col, row);
int totalPixels = width * height;
int decodedPixels = 0;
@@ -418,6 +418,7 @@ private void ReadHuffmanCodes(Vp8LDecoder decoder, int xSize, int ySize, int col
var huffmanTables = new HuffmanCode[numHTreeGroups * tableSize];
var hTreeGroups = new HTreeGroup[numHTreeGroups];
Span huffmanTable = huffmanTables.AsSpan();
+ int[] codeLengths = new int[maxAlphabetSize];
for (int i = 0; i < numHTreeGroupsMax; i++)
{
hTreeGroups[i] = new HTreeGroup(HuffmanUtils.HuffmanPackedTableSize);
@@ -425,7 +426,7 @@ private void ReadHuffmanCodes(Vp8LDecoder decoder, int xSize, int ySize, int col
int totalSize = 0;
bool isTrivialLiteral = true;
int maxBits = 0;
- int[] codeLengths = new int[maxAlphabetSize];
+ codeLengths.AsSpan().Clear();
for (int j = 0; j < WebpConstants.HuffmanCodesPerMetaCode; j++)
{
int alphabetSize = WebpConstants.AlphabetSize[j];
@@ -731,7 +732,7 @@ public void DecodeAlphaData(AlphaDecoder dec)
int lastRow = height;
const int lenCodeLimit = WebpConstants.NumLiteralCodes + WebpConstants.NumLengthCodes;
int mask = hdr.HuffmanMask;
- HTreeGroup[] htreeGroup = pos < last ? GetHTreeGroupForPos(hdr, col, row) : null;
+ Span htreeGroup = pos < last ? GetHTreeGroupForPos(hdr, col, row) : null;
while (!this.bitReader.Eos && pos < last)
{
// Only update when changing tile.
@@ -815,7 +816,7 @@ private void UpdateDecoder(Vp8LDecoder decoder, int width, int height)
decoder.Metadata.HuffmanMask = numBits == 0 ? ~0 : (1 << numBits) - 1;
}
- private uint ReadPackedSymbols(HTreeGroup[] group, Span pixelData, int decodedPixels)
+ private uint ReadPackedSymbols(Span group, Span pixelData, int decodedPixels)
{
uint val = (uint)(this.bitReader.PrefetchBits() & (HuffmanUtils.HuffmanPackedTableSize - 1));
HuffmanCode code = group[0].PackedTable[val];
@@ -895,10 +896,10 @@ private int GetCopyDistance(int distanceSymbol)
}
[MethodImpl(InliningOptions.ShortMethod)]
- private static HTreeGroup[] GetHTreeGroupForPos(Vp8LMetadata metadata, int x, int y)
+ private static Span GetHTreeGroupForPos(Vp8LMetadata metadata, int x, int y)
{
uint metaIndex = GetMetaIndex(metadata.HuffmanImage, metadata.HuffmanXSize, metadata.HuffmanSubSampleBits, x, y);
- return metadata.HTreeGroups.AsSpan((int)metaIndex).ToArray();
+ return metadata.HTreeGroups.AsSpan((int)metaIndex);
}
[MethodImpl(InliningOptions.ShortMethod)]
diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
index 1584237b0c..a10ec6eabb 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
@@ -4,23 +4,78 @@
using System;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
// ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
internal static class LossyUtils
{
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static readonly Vector128 Mean16x4Mask = Vector128.Create((short)0x00ff).AsByte();
+#endif
+
+ // Note: method name in libwebp reference implementation is called VP8SSE16x16.
[MethodImpl(InliningOptions.ShortMethod)]
- public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16);
+ public static int Vp8_Sse16X16(Span a, Span b) => Vp8_SseNxN(a, b, 16, 16);
+ // Note: method name in libwebp reference implementation is called VP8SSE16x8.
[MethodImpl(InliningOptions.ShortMethod)]
- public static int Vp8Sse16X8(Span a, Span b) => GetSse(a, b, 16, 8);
+ public static int Vp8_Sse16X8(Span a, Span b) => Vp8_SseNxN(a, b, 16, 8);
+ // Note: method name in libwebp reference implementation is called VP8SSE4x4.
[MethodImpl(InliningOptions.ShortMethod)]
- public static int Vp8Sse4X4(Span a, Span b) => GetSse(a, b, 4, 4);
+ public static int Vp8_Sse4X4(Span a, Span b)
+ {
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ // Load values.
+ ref byte aRef = ref MemoryMarshal.GetReference(a);
+ Vector128 a0 = Unsafe.As>(ref aRef);
+ Vector128 a1 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps));
+ Vector128 a2 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 2));
+ Vector128 a3 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 3));
+ ref byte bRef = ref MemoryMarshal.GetReference(b);
+ Vector128 b0 = Unsafe.As>(ref bRef);
+ Vector128 b1 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps));
+ Vector128 b2 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 2));
+ Vector128 b3 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 3));
+
+ // Combine pair of lines.
+ Vector128 a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32());
+ Vector128 a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32());
+ Vector128 b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32());
+ Vector128 b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32());
+
+ // Convert to 16b.
+ Vector128 a01s = Sse2.UnpackLow(a01.AsByte(), Vector128.Zero);
+ Vector128 a23s = Sse2.UnpackLow(a23.AsByte(), Vector128.Zero);
+ Vector128 b01s = Sse2.UnpackLow(b01.AsByte(), Vector128.Zero);
+ Vector128 b23s = Sse2.UnpackLow(b23.AsByte(), Vector128.Zero);
+
+ // subtract, square and accumulate.
+ Vector128 d0 = Sse2.SubtractSaturate(a01s, b01s);
+ Vector128 d1 = Sse2.SubtractSaturate(a23s, b23s);
+ Vector128 e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16());
+ Vector128 e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16());
+ Vector128 sum = Sse2.Add(e0, e1);
+
+ return Numerics.ReduceSum(sum);
+ }
+ else
+#endif
+ {
+ return Vp8_SseNxN(a, b, 4, 4);
+ }
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- public static int GetSse(Span a, Span b, int w, int h)
+ public static int Vp8_SseNxN(Span a, Span b, int w, int h)
{
int count = 0;
int aOffset = 0;
@@ -58,14 +113,15 @@ public static void Copy(Span src, Span dst, int w, int h)
}
[MethodImpl(InliningOptions.ShortMethod)]
- public static int Vp8Disto16X16(Span a, Span b, Span w)
+ public static int Vp8Disto16X16(Span a, Span b, Span w, Span scratch)
{
int d = 0;
+ int dataSize = (4 * WebpConstants.Bps) - 16;
for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps)
{
for (int x = 0; x < 16; x += 4)
{
- d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w);
+ d += Vp8Disto4X4(a.Slice(x + y, dataSize), b.Slice(x + y, dataSize), w, scratch);
}
}
@@ -73,11 +129,21 @@ public static int Vp8Disto16X16(Span a, Span b, Span w)
}
[MethodImpl(InliningOptions.ShortMethod)]
- public static int Vp8Disto4X4(Span a, Span b, Span w)
+ public static int Vp8Disto4X4(Span a, Span b, Span w, Span scratch)
{
- int sum1 = TTransform(a, w);
- int sum2 = TTransform(b, w);
- return Math.Abs(sum2 - sum1) >> 5;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse41.IsSupported)
+ {
+ int diffSum = TTransformSse41(a, b, w);
+ return Math.Abs(diffSum) >> 5;
+ }
+ else
+#endif
+ {
+ int sum1 = TTransform(a, w, scratch);
+ int sum2 = TTransform(b, w, scratch);
+ return Math.Abs(sum2 - sum1) >> 5;
+ }
}
public static void DC16(Span dst, Span yuv, int offset)
@@ -252,18 +318,14 @@ public static void DC4(Span dst, Span yuv, int offset)
[MethodImpl(InliningOptions.ShortMethod)]
public static void TM4(Span dst, Span yuv, int offset) => TrueMotion(dst, yuv, offset, 4);
- public static void VE4(Span dst, Span yuv, int offset)
+ public static void VE4(Span dst, Span yuv, int offset, Span vals)
{
// vertical
int topOffset = offset - WebpConstants.Bps;
- byte[] vals =
- {
- Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]),
- Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]),
- Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]),
- Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4])
- };
-
+ vals[0] = Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]);
+ vals[1] = Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]);
+ vals[2] = Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]);
+ vals[3] = Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4]);
int endIdx = 4 * WebpConstants.Bps;
for (int i = 0; i < endIdx; i += WebpConstants.Bps)
{
@@ -504,9 +566,10 @@ public static void HU4(Span dst, Span yuv, int offset)
///
/// Paragraph 14.3: Implementation of the Walsh-Hadamard transform inversion.
///
- public static void TransformWht(Span input, Span output)
+ public static void TransformWht(Span input, Span output, Span scratch)
{
- int[] tmp = new int[16];
+ Span tmp = scratch.Slice(0, 16);
+ tmp.Clear();
for (int i = 0; i < 4; i++)
{
int iPlus4 = 4 + i;
@@ -544,10 +607,11 @@ public static void TransformWht(Span input, Span output)
/// Returns the weighted sum of the absolute value of transformed coefficients.
/// w[] contains a row-major 4 by 4 symmetric matrix.
///
- public static int TTransform(Span input, Span w)
+ public static int TTransform(Span input, Span w, Span scratch)
{
int sum = 0;
- int[] tmp = new int[16];
+ Span tmp = scratch.Slice(0, 16);
+ tmp.Clear();
// horizontal pass.
int inputOffset = 0;
@@ -591,15 +655,131 @@ public static int TTransform(Span input, Span w)
return sum;
}
- public static void TransformTwo(Span src, Span dst)
+#if SUPPORTS_RUNTIME_INTRINSICS
+ ///
+ /// Hadamard transform
+ /// Returns the weighted sum of the absolute value of transformed coefficients.
+ /// w[] contains a row-major 4 by 4 symmetric matrix.
+ ///
+ public static int TTransformSse41(Span inputA, Span inputB, Span w)
{
- TransformOne(src, dst);
- TransformOne(src.Slice(16), dst.Slice(4));
+ // Load and combine inputs.
+ Vector128 ina0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA));
+ Vector128 ina1 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps, 16)));
+ Vector128 ina2 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps * 2, 16)));
+ Vector128 ina3 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps * 3, 16))).AsInt64();
+ Vector128 inb0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB));
+ Vector128 inb1 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps, 16)));
+ Vector128 inb2 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps * 2, 16)));
+ Vector128 inb3 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps * 3, 16))).AsInt64();
+
+ // Combine inA and inB (we'll do two transforms in parallel).
+ Vector128 inab0 = Sse2.UnpackLow(ina0.AsInt32(), inb0.AsInt32());
+ Vector128 inab1 = Sse2.UnpackLow(ina1.AsInt32(), inb1.AsInt32());
+ Vector128 inab2 = Sse2.UnpackLow(ina2.AsInt32(), inb2.AsInt32());
+ Vector128 inab3 = Sse2.UnpackLow(ina3.AsInt32(), inb3.AsInt32());
+ Vector128 tmp0 = Sse41.ConvertToVector128Int16(inab0.AsByte());
+ Vector128 tmp1 = Sse41.ConvertToVector128Int16(inab1.AsByte());
+ Vector128 tmp2 = Sse41.ConvertToVector128Int16(inab2.AsByte());
+ Vector128 tmp3 = Sse41.ConvertToVector128Int16(inab3.AsByte());
+
+ // a00 a01 a02 a03 b00 b01 b02 b03
+ // a10 a11 a12 a13 b10 b11 b12 b13
+ // a20 a21 a22 a23 b20 b21 b22 b23
+ // a30 a31 a32 a33 b30 b31 b32 b33
+ // Vertical pass first to avoid a transpose (vertical and horizontal passes
+ // are commutative because w/kWeightY is symmetric) and subsequent transpose.
+ // Calculate a and b (two 4x4 at once).
+ Vector128 a0 = Sse2.Add(tmp0, tmp2);
+ Vector128 a1 = Sse2.Add(tmp1, tmp3);
+ Vector128 a2 = Sse2.Subtract(tmp1, tmp3);
+ Vector128 a3 = Sse2.Subtract(tmp0, tmp2);
+ Vector128 b0 = Sse2.Add(a0, a1);
+ Vector128 b1 = Sse2.Add(a3, a2);
+ Vector128 b2 = Sse2.Subtract(a3, a2);
+ Vector128 b3 = Sse2.Subtract(a0, a1);
+
+ // a00 a01 a02 a03 b00 b01 b02 b03
+ // a10 a11 a12 a13 b10 b11 b12 b13
+ // a20 a21 a22 a23 b20 b21 b22 b23
+ // a30 a31 a32 a33 b30 b31 b32 b33
+ // Transpose the two 4x4.
+ Vector128 transpose00 = Sse2.UnpackLow(b0, b1);
+ Vector128 transpose01 = Sse2.UnpackLow(b2, b3);
+ Vector128 transpose02 = Sse2.UnpackHigh(b0, b1);
+ Vector128 transpose03 = Sse2.UnpackHigh(b2, b3);
+
+ // a00 a10 a01 a11 a02 a12 a03 a13
+ // a20 a30 a21 a31 a22 a32 a23 a33
+ // b00 b10 b01 b11 b02 b12 b03 b13
+ // b20 b30 b21 b31 b22 b32 b23 b33
+ Vector128 transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32());
+ Vector128 transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32());
+ Vector128 transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32());
+ Vector128 transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32());
+
+ // a00 a10 a20 a30 a01 a11 a21 a31
+ // b00 b10 b20 b30 b01 b11 b21 b31
+ // a02 a12 a22 a32 a03 a13 a23 a33
+ // b02 b12 a22 b32 b03 b13 b23 b33
+ Vector128 output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64());
+ Vector128 output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64());
+ Vector128 output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64());
+ Vector128 output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64());
+
+ // a00 a10 a20 a30 b00 b10 b20 b30
+ // a01 a11 a21 a31 b01 b11 b21 b31
+ // a02 a12 a22 a32 b02 b12 b22 b32
+ // a03 a13 a23 a33 b03 b13 b23 b33
+ // Horizontal pass and difference of weighted sums.
+ Vector128 w0 = Unsafe.As>(ref MemoryMarshal.GetReference(w));
+ Vector128 w8 = Unsafe.As>(ref MemoryMarshal.GetReference(w.Slice(8, 8)));
+
+ // Calculate a and b (two 4x4 at once).
+ a0 = Sse2.Add(output0.AsInt16(), output2.AsInt16());
+ a1 = Sse2.Add(output1.AsInt16(), output3.AsInt16());
+ a2 = Sse2.Subtract(output1.AsInt16(), output3.AsInt16());
+ a3 = Sse2.Subtract(output0.AsInt16(), output2.AsInt16());
+ b0 = Sse2.Add(a0, a1);
+ b1 = Sse2.Add(a3, a2);
+ b2 = Sse2.Subtract(a3, a2);
+ b3 = Sse2.Subtract(a0, a1);
+
+ // Separate the transforms of inA and inB.
+ Vector128 ab0 = Sse2.UnpackLow(b0.AsInt64(), b1.AsInt64());
+ Vector128 ab2 = Sse2.UnpackLow(b2.AsInt64(), b3.AsInt64());
+ Vector128 bb0 = Sse2.UnpackHigh(b0.AsInt64(), b1.AsInt64());
+ Vector128 bb2 = Sse2.UnpackHigh(b2.AsInt64(), b3.AsInt64());
+
+ Vector128 ab0Abs = Ssse3.Abs(ab0.AsInt16());
+ Vector128 ab2Abs = Ssse3.Abs(ab2.AsInt16());
+ Vector128 b0Abs = Ssse3.Abs(bb0.AsInt16());
+ Vector128 bb2Abs = Ssse3.Abs(bb2.AsInt16());
+
+ // weighted sums.
+ Vector128 ab0mulw0 = Sse2.MultiplyAddAdjacent(ab0Abs.AsInt16(), w0.AsInt16());
+ Vector128 ab2mulw8 = Sse2.MultiplyAddAdjacent(ab2Abs.AsInt16(), w8.AsInt16());
+ Vector128 b0mulw0 = Sse2.MultiplyAddAdjacent(b0Abs.AsInt16(), w0.AsInt16());
+ Vector128 bb2mulw8 = Sse2.MultiplyAddAdjacent(bb2Abs.AsInt16(), w8.AsInt16());
+ Vector128 ab0ab2Sum = Sse2.Add(ab0mulw0, ab2mulw8);
+ Vector128 b0w0bb2w8Sum = Sse2.Add(b0mulw0, bb2mulw8);
+
+ // difference of weighted sums.
+ Vector128 result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32());
+
+ return Numerics.ReduceSum(result);
}
+#endif
- public static void TransformOne(Span src, Span dst)
+ public static void TransformTwo(Span src, Span dst, Span scratch)
{
- Span tmp = stackalloc int[4 * 4];
+ TransformOne(src, dst, scratch);
+ TransformOne(src.Slice(16), dst.Slice(4), scratch);
+ }
+
+ public static void TransformOne(Span src, Span dst, Span scratch)
+ {
+ Span tmp = scratch.Slice(0, 16);
int tmpOffset = 0;
for (int srcOffset = 0; srcOffset < 4; srcOffset++)
{
@@ -671,10 +851,10 @@ public static void TransformAc3(Span src, Span dst)
Store2(dst, 3, a - d4, d1, c1);
}
- public static void TransformUv(Span src, Span dst)
+ public static void TransformUv(Span src, Span dst, Span scratch)
{
- TransformTwo(src.Slice(0 * 16), dst);
- TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps));
+ TransformTwo(src.Slice(0 * 16), dst, scratch);
+ TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps), scratch);
}
public static void TransformDcuv(Span src, Span dst)
@@ -802,26 +982,55 @@ public static void HFilter8i(Span u, Span v, int offset, int stride,
FilterLoop24(v, offsetPlus4, 1, stride, 8, thresh, ithresh, hevThresh);
}
- [MethodImpl(InliningOptions.ShortMethod)]
- public static uint LoadUv(byte u, byte v) =>
- (uint)(u | (v << 16)); // We process u and v together stashed into 32bit(16bit each).
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public static void YuvToBgr(int y, int u, int v, Span bgr)
+ public static void Mean16x4(Span input, Span dc)
{
- bgr[0] = (byte)YuvToB(y, u);
- bgr[1] = (byte)YuvToG(y, u, v);
- bgr[2] = (byte)YuvToR(y, v);
- }
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public static int YuvToB(int y, int u) => Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685);
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public static int YuvToG(int y, int u, int v) => Clip8(MultHi(y, 19077) - MultHi(u, 6419) - MultHi(v, 13320) + 8708);
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Ssse3.IsSupported)
+ {
+ Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(input));
+ Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps, 16)));
+ Vector128 a2 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps * 2, 16)));
+ Vector128 a3 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps * 3, 16)));
+ Vector128 b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte
+ Vector128 b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8);
+ Vector128 b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8);
+ Vector128 b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8);
+ Vector128 c0 = Sse2.And(a0, Mean16x4Mask); // lo byte
+ Vector128 c1 = Sse2.And(a1, Mean16x4Mask);
+ Vector128 c2 = Sse2.And(a2, Mean16x4Mask);
+ Vector128 c3 = Sse2.And(a3, Mean16x4Mask);
+ Vector128 d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32());
+ Vector128 d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32());
+ Vector128 d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32());
+ Vector128