diff --git a/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs b/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs index 2e212ad19f..9a8b5f0a84 100644 --- a/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs +++ b/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; @@ -25,12 +25,14 @@ public static class SRgbCompanding [MethodImpl(InliningOptions.ShortMethod)] public static void Expand(Span vectors) { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Expand(ref v); + Expand(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } @@ -41,12 +43,14 @@ public static void Expand(Span vectors) [MethodImpl(InliningOptions.ShortMethod)] public static void Compress(Span vectors) { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Compress(ref v); + Compress(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } @@ -90,4 +94,4 @@ public static void Compress(ref Vector4 vector) [MethodImpl(InliningOptions.ShortMethod)] public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F; } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 56ab46c685..b9ccfafe0e 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -41,13 +41,11 @@ public static int GreatestCommonDivisor(int a, int b) /// /// Determine the Least Common Multiple (LCM) of two numbers. + /// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int LeastCommonMultiple(int a, int b) - { - // https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor - return (a / GreatestCommonDivisor(a, b)) * b; - } + => a / GreatestCommonDivisor(a, b) * b; /// /// Calculates % 2 @@ -290,10 +288,14 @@ public static void Clamp(Span span, byte min, byte max) if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref byte remainderStart = ref MemoryMarshal.GetReference(remainder); + ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref byte v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -311,10 +313,14 @@ public static void Clamp(Span span, uint min, uint max) if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref uint remainderStart = ref MemoryMarshal.GetReference(remainder); + ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref uint v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -332,10 +338,14 @@ public static void Clamp(Span span, int min, int max) if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref int remainderStart = ref MemoryMarshal.GetReference(remainder); + ref int remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref int v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -353,10 +363,14 @@ public static void Clamp(Span span, float min, float max) if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref float remainderStart = ref MemoryMarshal.GetReference(remainder); + ref float remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref float v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -374,10 +388,14 @@ public static void Clamp(Span span, double min, double max) if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref double remainderStart = ref MemoryMarshal.GetReference(remainder); + ref double remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref double v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -407,7 +425,6 @@ private static void ClampImpl(Span span, T min, T max) where T : unmanaged { ref T sRef = ref MemoryMarshal.GetReference(span); - ref Vector vsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(span)); var vmin = new Vector(min); var vmax = new Vector(max); @@ -415,25 +432,35 @@ private static void ClampImpl(Span span, T min, T max) int m = Modulo4(n); int u = n - m; - for (int i = 0; i < u; i += 4) - { - ref Vector vs0 = ref Unsafe.Add(ref vsBase, i); - ref Vector vs1 = ref Unsafe.Add(ref vs0, 1); - ref Vector vs2 = ref Unsafe.Add(ref vs0, 2); - ref Vector vs3 = ref Unsafe.Add(ref vs0, 3); + ref Vector vs0 = ref Unsafe.As>(ref MemoryMarshal.GetReference(span)); + ref Vector vs1 = ref Unsafe.Add(ref vs0, 1); + ref Vector vs2 = ref Unsafe.Add(ref vs0, 2); + ref Vector vs3 = ref Unsafe.Add(ref vs0, 3); + ref Vector vsEnd = ref Unsafe.Add(ref vs0, u); + while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd)) + { vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax); vs1 = Vector.Min(Vector.Max(vmin, vs1), vmax); vs2 = Vector.Min(Vector.Max(vmin, vs2), vmax); vs3 = Vector.Min(Vector.Max(vmin, vs3), vmax); + + vs0 = ref Unsafe.Add(ref vs0, 4); + vs1 = ref Unsafe.Add(ref vs1, 4); + vs2 = ref Unsafe.Add(ref vs2, 4); + vs3 = ref Unsafe.Add(ref vs3, 4); } if (m > 0) { - for (int i = u; i < n; i++) + vs0 = ref vsEnd; + vsEnd = ref Unsafe.Add(ref vsEnd, m); + + while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd)) { - ref Vector vs0 = ref Unsafe.Add(ref vsBase, i); vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax); + + vs0 = ref Unsafe.Add(ref vs0, 1); } } } @@ -472,10 +499,8 @@ public static void Premultiply(Span vectors) #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported && vectors.Length >= 2) { - ref Vector256 vectorsBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) @@ -495,12 +520,14 @@ public static void Premultiply(Span vectors) else #endif { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Premultiply(ref v); + Premultiply(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } } @@ -515,10 +542,8 @@ public static void UnPremultiply(Span vectors) #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported && vectors.Length >= 2) { - ref Vector256 vectorsBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) @@ -538,12 +563,14 @@ public static void UnPremultiply(Span vectors) else #endif { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - UnPremultiply(ref v); + UnPremultiply(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } } @@ -633,53 +660,54 @@ public static unsafe void CubeRootOnXYZ(Span vectors) vectors128Ref = y4; vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1); } - - return; } + else #endif - ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors); - ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length); - - // Fallback with scalar preprocessing and vectorized approximation steps - while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd)) { - Vector4 v = vectorsRef; + ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length); - double - x64 = v.X, - y64 = v.Y, - z64 = v.Z; - float a = v.W; - - ulong - xl = *(ulong*)&x64, - yl = *(ulong*)&y64, - zl = *(ulong*)&z64; - - // Here we use a trick to compute the starting value x0 for the cube root. This is because doing - // pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case, - // this means what we actually want is to find the cube root of our clamped values. - // For more info on the constant below, see: - // https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543. - // Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and - // store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit - // register, and use it to accelerate two steps of the Newton approximation using SIMD. - xl = 0x2a9f8a7be393b600 + (xl / 3); - yl = 0x2a9f8a7be393b600 + (yl / 3); - zl = 0x2a9f8a7be393b600 + (zl / 3); - - Vector4 y4; - y4.X = (float)*(double*)&xl; - y4.Y = (float)*(double*)&yl; - y4.Z = (float)*(double*)&zl; - y4.W = 0; - - y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); - y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); - y4.W = a; - - vectorsRef = y4; - vectorsRef = ref Unsafe.Add(ref vectorsRef, 1); + // Fallback with scalar preprocessing and vectorized approximation steps + while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd)) + { + Vector4 v = vectorsRef; + + double + x64 = v.X, + y64 = v.Y, + z64 = v.Z; + float a = v.W; + + ulong + xl = *(ulong*)&x64, + yl = *(ulong*)&y64, + zl = *(ulong*)&z64; + + // Here we use a trick to compute the starting value x0 for the cube root. This is because doing + // pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case, + // this means what we actually want is to find the cube root of our clamped values. + // For more info on the constant below, see: + // https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543. + // Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and + // store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit + // register, and use it to accelerate two steps of the Newton approximation using SIMD. + xl = 0x2a9f8a7be393b600 + (xl / 3); + yl = 0x2a9f8a7be393b600 + (yl / 3); + zl = 0x2a9f8a7be393b600 + (zl / 3); + + Vector4 y4; + y4.X = (float)*(double*)&xl; + y4.Y = (float)*(double*)&yl; + y4.Z = (float)*(double*)&zl; + y4.W = 0; + + y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); + y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); + y4.W = a; + + vectorsRef = y4; + vectorsRef = ref Unsafe.Add(ref vectorsRef, 1); + } } } } diff --git a/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs b/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs index 999f6325bc..6b6ff4319f 100644 --- a/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs +++ b/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs @@ -88,14 +88,16 @@ private static void UnsafeFromVector4Core( Span destPixels) where TPixel : unmanaged, IPixel { - ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length); ref TPixel destRef = ref MemoryMarshal.GetReference(destPixels); - for (int i = 0; i < sourceVectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i); - ref TPixel dp = ref Unsafe.Add(ref destRef, i); - dp.FromVector4(sp); + destRef.FromVector4(sourceStart); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } @@ -105,14 +107,16 @@ private static void UnsafeToVector4Core( Span destVectors) where TPixel : unmanaged, IPixel { - ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourcePixels); + ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourcePixels); + ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourcePixels.Length); ref Vector4 destRef = ref MemoryMarshal.GetReference(destVectors); - for (int i = 0; i < sourcePixels.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref TPixel sp = ref Unsafe.Add(ref sourceRef, i); - ref Vector4 dp = ref Unsafe.Add(ref destRef, i); - dp = sp.ToVector4(); + destRef = sourceStart.ToVector4(); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } @@ -122,14 +126,16 @@ private static void UnsafeFromScaledVector4Core( Span destinationColors) where TPixel : unmanaged, IPixel { - ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length); ref TPixel destRef = ref MemoryMarshal.GetReference(destinationColors); - for (int i = 0; i < sourceVectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i); - ref TPixel dp = ref Unsafe.Add(ref destRef, i); - dp.FromScaledVector4(sp); + destRef.FromScaledVector4(sourceStart); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } @@ -139,16 +145,18 @@ private static void UnsafeToScaledVector4Core( Span destinationVectors) where TPixel : unmanaged, IPixel { - ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourceColors); + ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourceColors); + ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourceColors.Length); ref Vector4 destRef = ref MemoryMarshal.GetReference(destinationVectors); - for (int i = 0; i < sourceColors.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref TPixel sp = ref Unsafe.Add(ref sourceRef, i); - ref Vector4 dp = ref Unsafe.Add(ref destRef, i); - dp = sp.ToScaledVector4(); + destRef = sourceStart.ToScaledVector4(); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs index d4fb27a57f..13fe627d16 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs @@ -129,29 +129,34 @@ public void Invoke(int y) int boundsWidth = this.bounds.Width; int kernelSize = this.kernel.Length; - Span rowOffsets = this.map.GetRowOffsetSpan(); - ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(rowOffsets), (y - this.bounds.Y) * kernelSize); + ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(this.map.GetRowOffsetSpan()), (y - this.bounds.Y) * kernelSize); // The target buffer is zeroed initially and then it accumulates the results // of each partial convolution, so we don't have to clear it here as well ref Vector4 targetBase = ref this.targetValues.GetElementUnsafe(boundsX, y); - ref Complex64 kernelBase = ref this.kernel[0]; + ref Complex64 kernelStart = ref this.kernel[0]; + ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize); - for (int kY = 0; kY < kernelSize; kY++) + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { // Get the precalculated source sample row for this kernel row and copy to our buffer - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleY); - Complex64 factor = Unsafe.Add(ref kernelBase, kY); + ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleRowBase); + ref ComplexVector4 sourceEnd = ref Unsafe.Add(ref sourceBase, boundsWidth); + ref Vector4 targetStart = ref targetBase; + Complex64 factor = kernelStart; - for (int x = 0; x < boundsWidth; x++) + while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd)) { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - ComplexVector4 sample = Unsafe.Add(ref sourceBase, x); - ComplexVector4 partial = factor * sample; + ComplexVector4 partial = factor * sourceBase; - target += partial.WeightedSum(this.z, this.w); + targetStart += partial.WeightedSum(this.z, this.w); + + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + targetStart = ref Unsafe.Add(ref targetStart, 1); } + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs index a21155e10c..241ff9db28 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs @@ -233,32 +233,37 @@ public void Invoke(int y, Span span) // Clear the target buffer for each row run Span targetBuffer = this.targetValues.GetRowSpan(y); targetBuffer.Clear(); - ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); // Execute the bulk pixel format conversion for the current row Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, span); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span); + ref ComplexVector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer); + ref ComplexVector4 targetEnd = ref Unsafe.Add(ref targetStart, span.Length); ref Complex64 kernelBase = ref this.kernel[0]; + ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize); ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); - for (int x = 0; x < span.Length; x++) + while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd)) { - ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x); + ref Complex64 kernelStart = ref kernelBase; + ref int sampleColumnStart = ref sampleColumnBase; - for (int kX = 0; kX < kernelSize; kX++) + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - Complex64 factor = Unsafe.Add(ref kernelBase, kX); + Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX); - target.Sum(factor * sample); + targetStart.Sum(kernelStart * sample); + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1); } // Shift the base column sampling reference by one row at the end of each outer // iteration so that the inner tight loop indexing can skip the multiplication sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); + targetStart = ref Unsafe.Add(ref targetStart, 1); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs index 8c5358770c..5beadb0cee 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Processing.Processors.Convolution @@ -23,24 +24,18 @@ public BoxBlurProcessor(Configuration configuration, BoxBlurProcessor definition : base(configuration, source, sourceRectangle) { int kernelSize = (definition.Radius * 2) + 1; - this.KernelX = CreateBoxKernel(kernelSize); - this.KernelY = this.KernelX.Transpose(); + this.Kernel = CreateBoxKernel(kernelSize); } /// - /// Gets the horizontal gradient operator. + /// Gets the 1D convolution kernel. /// - public DenseMatrix KernelX { get; } - - /// - /// Gets the vertical gradient operator. - /// - public DenseMatrix KernelY { get; } + public float[] Kernel { get; } /// protected override void OnFrameApply(ImageFrame source) { - using var processor = new Convolution2PassProcessor(this.Configuration, this.KernelX, this.KernelY, false, this.Source, this.SourceRectangle); + using var processor = new Convolution2PassProcessor(this.Configuration, this.Kernel, false, this.Source, this.SourceRectangle); processor.Apply(source); } @@ -50,10 +45,12 @@ protected override void OnFrameApply(ImageFrame source) /// /// The maximum size of the kernel in either direction. /// The . - private static DenseMatrix CreateBoxKernel(int kernelSize) + private static float[] CreateBoxKernel(int kernelSize) { - var kernel = new DenseMatrix(kernelSize, 1); - kernel.Fill(1F / kernelSize); + var kernel = new float[kernelSize]; + + kernel.AsSpan().Fill(1F / kernelSize); + return kernel; } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 16ce0fdd75..365b2e2dfc 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -1,7 +1,10 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -19,34 +22,26 @@ internal class Convolution2PassProcessor : ImageProcessor /// Initializes a new instance of the class. /// /// The configuration which allows altering default behaviour or extending the library. - /// The horizontal gradient operator. - /// The vertical gradient operator. + /// The 1D convolution kernel. /// Whether the convolution filter is applied to alpha as well as the color channels. /// The source for the current processor instance. /// The source area to process for the current processor instance. public Convolution2PassProcessor( Configuration configuration, - in DenseMatrix kernelX, - in DenseMatrix kernelY, + float[] kernel, bool preserveAlpha, Image source, Rectangle sourceRectangle) : base(configuration, source, sourceRectangle) { - this.KernelX = kernelX; - this.KernelY = kernelY; + this.Kernel = kernel; this.PreserveAlpha = preserveAlpha; } /// - /// Gets the horizontal convolution kernel. + /// Gets the convolution kernel. /// - public DenseMatrix KernelX { get; } - - /// - /// Gets the vertical convolution kernel. - /// - public DenseMatrix KernelY { get; } + public float[] Kernel { get; } /// /// Gets a value indicating whether the convolution filter is applied to alpha as well as the color channels. @@ -64,44 +59,364 @@ protected override void OnFrameApply(ImageFrame source) // for source and target bulk pixel conversion. var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height); - using (var mapX = new KernelSamplingMap(this.Configuration.MemoryAllocator)) + // We can create a single sampling map with the size as if we were using the non separated 2D kernel + // the two 1D kernels represent, and reuse it across both convolution steps, like in the bokeh blur. + using var mapXY = new KernelSamplingMap(this.Configuration.MemoryAllocator); + + mapXY.BuildSamplingOffsetMap(this.Kernel.Length, this.Kernel.Length, interest); + + // Horizontal convolution + var horizontalOperation = new HorizontalConvolutionRowOperation( + interest, + firstPassPixels, + source.PixelBuffer, + mapXY, + this.Kernel, + this.Configuration, + this.PreserveAlpha); + + ParallelRowIterator.IterateRows( + this.Configuration, + operationBounds, + in horizontalOperation); + + // Vertical convolution + var verticalOperation = new VerticalConvolutionRowOperation( + interest, + source.PixelBuffer, + firstPassPixels, + mapXY, + this.Kernel, + this.Configuration, + this.PreserveAlpha); + + ParallelRowIterator.IterateRows( + this.Configuration, + operationBounds, + in verticalOperation); + } + + /// + /// A implementing the logic for the horizontal 1D convolution. + /// + internal readonly struct HorizontalConvolutionRowOperation : IRowOperation + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly float[] kernel; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public HorizontalConvolutionRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + float[] kernel, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernel = kernel; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) { - mapX.BuildSamplingOffsetMap(this.KernelX, interest); - - // Horizontal convolution - var horizontalOperation = new ConvolutionRowOperation( - interest, - firstPassPixels, - source.PixelBuffer, - mapX, - this.KernelX, - this.Configuration, - this.PreserveAlpha); - - ParallelRowIterator.IterateRows, Vector4>( - this.Configuration, - operationBounds, - in horizontalOperation); + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } } - using (var mapY = new KernelSamplingMap(this.Configuration.MemoryAllocator)) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) { - mapY.BuildSamplingOffsetMap(this.KernelY, interest); - - // Vertical convolution - var verticalOperation = new ConvolutionRowOperation( - interest, - source.PixelBuffer, - firstPassPixels, - mapY, - this.KernelY, - this.Configuration, - this.PreserveAlpha); - - ParallelRowIterator.IterateRows, Vector4>( - this.Configuration, - operationBounds, - in verticalOperation); + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + int kernelSize = this.kernel.Length; + + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + + // Get the precalculated source sample row for this kernel row and copy to our buffer. + Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref Vector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer); + ref Vector4 targetEnd = ref Unsafe.Add(ref targetStart, sourceBuffer.Length); + ref float kernelBase = ref this.kernel[0]; + ref float kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize); + ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); + + while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd)) + { + ref float kernelStart = ref kernelBase; + ref int sampleColumnStart = ref sampleColumnBase; + + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) + { + Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX); + + targetStart += kernelStart * sample; + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1); + } + + targetStart = ref Unsafe.Add(ref targetStart, 1); + sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + targetStart = ref MemoryMarshal.GetReference(targetBuffer); + + while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd)) + { + targetStart.W = sourceBase.W; + + targetStart = ref Unsafe.Add(ref targetStart, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + int kernelSize = this.kernel.Length; + + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + + // Get the precalculated source sample row for this kernel row and copy to our buffer. + Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref Vector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer); + ref Vector4 targetEnd = ref Unsafe.Add(ref targetStart, sourceBuffer.Length); + ref float kernelBase = ref this.kernel[0]; + ref float kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize); + ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); + + while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd)) + { + ref float kernelStart = ref kernelBase; + ref int sampleColumnStart = ref sampleColumnBase; + + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) + { + Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX); + + targetStart += kernelStart * sample; + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1); + } + + targetStart = ref Unsafe.Add(ref targetStart, 1); + sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); + } + + Numerics.UnPremultiply(targetBuffer); + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + } + + /// + /// A implementing the logic for the vertical 1D convolution. + /// + internal readonly struct VerticalConvolutionRowOperation : IRowOperation + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly float[] kernel; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public VerticalConvolutionRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + float[] kernel, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernel = kernel; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + int kernelSize = this.kernel.Length; + + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(this.map.GetRowOffsetSpan()), (y - this.bounds.Y) * kernelSize); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + ref float kernelStart = ref this.kernel[0]; + ref float kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize); + + Span sourceRow; + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + sourceRow = this.sourcePixels.GetRowSpan(sampleRowBase).Slice(boundsX, boundsWidth); + + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length); + ref Vector4 targetStart = ref targetBase; + float factor = kernelStart; + + while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd)) + { + targetStart += factor * sourceBase; + + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + targetStart = ref Unsafe.Add(ref targetStart, 1); + } + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1); + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + { + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length); + + while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd)) + { + targetBase.W = sourceBase.W; + + targetBase = ref Unsafe.Add(ref targetBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + } + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + int kernelSize = this.kernel.Length; + + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(this.map.GetRowOffsetSpan()), (y - this.bounds.Y) * kernelSize); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + ref float kernelStart = ref this.kernel[0]; + ref float kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize); + + Span sourceRow; + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + sourceRow = this.sourcePixels.GetRowSpan(sampleRowBase).Slice(boundsX, boundsWidth); + + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length); + ref Vector4 targetStart = ref targetBase; + float factor = kernelStart; + + while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd)) + { + targetStart += factor * sourceBase; + + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + targetStart = ref Unsafe.Add(ref targetStart, 1); + } + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1); + } + + Numerics.UnPremultiply(targetBuffer); + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs index 9844f99563..f93cdabc47 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs @@ -12,17 +12,15 @@ internal static class ConvolutionProcessorHelpers /// See . /// internal static int GetDefaultGaussianRadius(float sigma) - { - return (int)MathF.Ceiling(sigma * 3); - } + => (int)MathF.Ceiling(sigma * 3); /// /// Create a 1 dimensional Gaussian kernel using the Gaussian G(x) function. /// - /// The . - internal static DenseMatrix CreateGaussianBlurKernel(int size, float weight) + /// The convolution kernel. + internal static float[] CreateGaussianBlurKernel(int size, float weight) { - var kernel = new DenseMatrix(size, 1); + var kernel = new float[size]; float sum = 0F; float midpoint = (size - 1) / 2F; @@ -32,13 +30,13 @@ internal static DenseMatrix CreateGaussianBlurKernel(int size, float weig float x = i - midpoint; float gx = Numerics.Gaussian(x, weight); sum += gx; - kernel[0, i] = gx; + kernel[i] = gx; } // Normalize kernel so that the sum of all weights equals 1 for (int i = 0; i < size; i++) { - kernel[0, i] /= sum; + kernel[i] /= sum; } return kernel; @@ -47,10 +45,10 @@ internal static DenseMatrix CreateGaussianBlurKernel(int size, float weig /// /// Create a 1 dimensional Gaussian kernel using the Gaussian G(x) function /// - /// The . - internal static DenseMatrix CreateGaussianSharpenKernel(int size, float weight) + /// The convolution kernel. + internal static float[] CreateGaussianSharpenKernel(int size, float weight) { - var kernel = new DenseMatrix(size, 1); + var kernel = new float[size]; float sum = 0; @@ -60,7 +58,7 @@ internal static DenseMatrix CreateGaussianSharpenKernel(int size, float w float x = i - midpoint; float gx = Numerics.Gaussian(x, weight); sum += gx; - kernel[0, i] = gx; + kernel[i] = gx; } // Invert the kernel for sharpening. @@ -70,19 +68,19 @@ internal static DenseMatrix CreateGaussianSharpenKernel(int size, float w if (i == midpointRounded) { // Calculate central value - kernel[0, i] = (2F * sum) - kernel[0, i]; + kernel[i] = (2F * sum) - kernel[i]; } else { // invert value - kernel[0, i] = -kernel[0, i]; + kernel[i] = -kernel[i]; } } // Normalize kernel so that the sum of all weights equals 1 for (int i = 0; i < size; i++) { - kernel[0, i] /= sum; + kernel[i] /= sum; } return kernel; diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs deleted file mode 100644 index 9876b2885b..0000000000 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Advanced; -using SixLabors.ImageSharp.Memory; -using SixLabors.ImageSharp.PixelFormats; - -namespace SixLabors.ImageSharp.Processing.Processors.Convolution -{ - /// - /// A implementing the logic for 1D convolution. - /// - internal readonly struct ConvolutionRowOperation : IRowOperation - where TPixel : unmanaged, IPixel - { - private readonly Rectangle bounds; - private readonly Buffer2D targetPixels; - private readonly Buffer2D sourcePixels; - private readonly KernelSamplingMap map; - private readonly DenseMatrix kernelMatrix; - private readonly Configuration configuration; - private readonly bool preserveAlpha; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public ConvolutionRowOperation( - Rectangle bounds, - Buffer2D targetPixels, - Buffer2D sourcePixels, - KernelSamplingMap map, - DenseMatrix kernelMatrix, - Configuration configuration, - bool preserveAlpha) - { - this.bounds = bounds; - this.targetPixels = targetPixels; - this.sourcePixels = sourcePixels; - this.map = map; - this.kernelMatrix = kernelMatrix; - this.configuration = configuration; - this.preserveAlpha = preserveAlpha; - } - - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void Invoke(int y, Span span) - { - if (this.preserveAlpha) - { - this.Convolve3(y, span); - } - else - { - this.Convolve4(y, span); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void Convolve3(int y, Span span) - { - // Span is 2x bounds. - int boundsX = this.bounds.X; - int boundsWidth = this.bounds.Width; - Span sourceBuffer = span.Slice(0, this.bounds.Width); - Span targetBuffer = span.Slice(this.bounds.Width); - - var state = new ConvolutionState(in this.kernelMatrix, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); - - // Clear the target buffer for each row run. - targetBuffer.Clear(); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); - - ReadOnlyKernel kernel = state.Kernel; - Span sourceRow; - for (int kY = 0; kY < kernel.Rows; kY++) - { - // Get the precalculated source sample row for this kernel row and copy to our buffer. - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); - PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); - - ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - - for (int x = 0; x < sourceBuffer.Length; x++) - { - ref int sampleColumnBase = ref state.GetSampleColumn(x); - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - - for (int kX = 0; kX < kernel.Columns; kX++) - { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - target += kernel[kY, kX] * sample; - } - } - } - - // Now we need to copy the original alpha values from the source row. - sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); - PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); - - for (int x = 0; x < sourceRow.Length; x++) - { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; - } - - Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); - PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void Convolve4(int y, Span span) - { - // Span is 2x bounds. - int boundsX = this.bounds.X; - int boundsWidth = this.bounds.Width; - Span sourceBuffer = span.Slice(0, this.bounds.Width); - Span targetBuffer = span.Slice(this.bounds.Width); - - var state = new ConvolutionState(in this.kernelMatrix, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); - - // Clear the target buffer for each row run. - targetBuffer.Clear(); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); - - ReadOnlyKernel kernel = state.Kernel; - for (int kY = 0; kY < kernel.Rows; kY++) - { - // Get the precalculated source sample row for this kernel row and copy to our buffer. - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); - PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); - - Numerics.Premultiply(sourceBuffer); - ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - - for (int x = 0; x < sourceBuffer.Length; x++) - { - ref int sampleColumnBase = ref state.GetSampleColumn(x); - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - - for (int kX = 0; kX < kernel.Columns; kX++) - { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - target += kernel[kY, kX] * sample; - } - } - } - - Numerics.UnPremultiply(targetBuffer); - - Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); - PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); - } - } -} diff --git a/src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs index a9b692a015..4ade01f914 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs @@ -27,24 +27,18 @@ public GaussianBlurProcessor( : base(configuration, source, sourceRectangle) { int kernelSize = (definition.Radius * 2) + 1; - this.KernelX = ConvolutionProcessorHelpers.CreateGaussianBlurKernel(kernelSize, definition.Sigma); - this.KernelY = this.KernelX.Transpose(); + this.Kernel = ConvolutionProcessorHelpers.CreateGaussianBlurKernel(kernelSize, definition.Sigma); } /// - /// Gets the horizontal gradient operator. + /// Gets the 1D convolution kernel. /// - public DenseMatrix KernelX { get; } - - /// - /// Gets the vertical gradient operator. - /// - public DenseMatrix KernelY { get; } + public float[] Kernel { get; } /// protected override void OnFrameApply(ImageFrame source) { - using var processor = new Convolution2PassProcessor(this.Configuration, this.KernelX, this.KernelY, false, this.Source, this.SourceRectangle); + using var processor = new Convolution2PassProcessor(this.Configuration, this.Kernel, false, this.Source, this.SourceRectangle); processor.Apply(source); } diff --git a/src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs index 5e20865e5c..73aaaec188 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs @@ -27,24 +27,18 @@ public GaussianSharpenProcessor( : base(configuration, source, sourceRectangle) { int kernelSize = (definition.Radius * 2) + 1; - this.KernelX = ConvolutionProcessorHelpers.CreateGaussianSharpenKernel(kernelSize, definition.Sigma); - this.KernelY = this.KernelX.Transpose(); + this.Kernel = ConvolutionProcessorHelpers.CreateGaussianSharpenKernel(kernelSize, definition.Sigma); } /// - /// Gets the horizontal gradient operator. + /// Gets the 1D convolution kernel. /// - public DenseMatrix KernelX { get; } - - /// - /// Gets the vertical gradient operator. - /// - public DenseMatrix KernelY { get; } + public float[] Kernel { get; } /// protected override void OnFrameApply(ImageFrame source) { - using var processor = new Convolution2PassProcessor(this.Configuration, this.KernelX, this.KernelY, false, this.Source, this.SourceRectangle); + using var processor = new Convolution2PassProcessor(this.Configuration, this.Kernel, false, this.Source, this.SourceRectangle); processor.Apply(source); }