-
-
Notifications
You must be signed in to change notification settings - Fork 852
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Vectorize (AVX2) JPEG Color Converter #1411
Changes from 12 commits
210d8f7
f421be2
7cc953e
89bb9fe
d76dbaa
82a2359
c7a7b50
8212e60
7ecfab7
c0ad228
7a3bdf3
da732b7
8f5ae2d
0aa3ba5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
// Copyright (c) Six Labors. | ||
// Licensed under the Apache License, Version 2.0. | ||
|
||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters | ||
{ | ||
internal abstract partial class JpegColorConverter | ||
{ | ||
internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter | ||
{ | ||
protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision) | ||
: base(colorSpace, precision, 8) | ||
{ | ||
} | ||
|
||
protected sealed override bool IsAvailable => SimdUtils.HasAvx2; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
// Copyright (c) Six Labors. | ||
// Licensed under the Apache License, Version 2.0. | ||
|
||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters | ||
{ | ||
internal abstract partial class JpegColorConverter | ||
{ | ||
internal abstract class BasicJpegColorConverter : JpegColorConverter | ||
{ | ||
protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision) | ||
: base(colorSpace, precision) | ||
{ | ||
} | ||
|
||
protected override bool IsAvailable => true; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// Copyright (c) Six Labors. | ||
// Licensed under the Apache License, Version 2.0. | ||
|
||
using System; | ||
using System.Numerics; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
#if SUPPORTS_RUNTIME_INTRINSICS | ||
using System.Runtime.Intrinsics; | ||
using System.Runtime.Intrinsics.X86; | ||
using static SixLabors.ImageSharp.SimdUtils; | ||
#endif | ||
|
||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters | ||
{ | ||
internal abstract partial class JpegColorConverter | ||
{ | ||
internal sealed class FromCmykAvx2 : Avx2JpegColorConverter | ||
{ | ||
public FromCmykAvx2(int precision) | ||
: base(JpegColorSpace.Cmyk, precision) | ||
{ | ||
} | ||
|
||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) | ||
{ | ||
#if SUPPORTS_RUNTIME_INTRINSICS | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we hide the whole file behind the condition instead, and add this guard to #if SUPPORTS_RUNTIME_INTRINSICS
yield return new FromYCbCrAvx2(precision);
#endif Same for the other color spaces. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we add the conditionals to the files we have to refactor the tests as I don't want to pepper the tests with conditionals and we don't have cross platform remote executor available to test everything on Core2.1 Win yet. Adding to the yield though is fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok lets skip it for now. Comment could be useful. |
||
ref Vector256<float> cBase = | ||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); | ||
ref Vector256<float> mBase = | ||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); | ||
ref Vector256<float> yBase = | ||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); | ||
ref Vector256<float> kBase = | ||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3)); | ||
|
||
ref Vector256<float> resultBase = | ||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); | ||
|
||
// Used for the color conversion | ||
var scale = Vector256.Create(1 / this.MaximumValue); | ||
var one = Vector256.Create(1F); | ||
|
||
// Used for packing | ||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); | ||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); | ||
|
||
int n = result.Length / 8; | ||
for (int i = 0; i < n; i++) | ||
{ | ||
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); | ||
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); | ||
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); | ||
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); | ||
|
||
k = Avx.Multiply(k, scale); | ||
|
||
c = Avx.Multiply(Avx.Multiply(c, k), scale); | ||
m = Avx.Multiply(Avx.Multiply(m, k), scale); | ||
y = Avx.Multiply(Avx.Multiply(y, k), scale); | ||
|
||
Vector256<float> cmLo = Avx.UnpackLow(c, m); | ||
Vector256<float> yoLo = Avx.UnpackLow(y, one); | ||
Vector256<float> cmHi = Avx.UnpackHigh(c, m); | ||
Vector256<float> yoHi = Avx.UnpackHigh(y, one); | ||
|
||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); | ||
|
||
destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); | ||
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); | ||
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); | ||
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); | ||
} | ||
#endif | ||
} | ||
|
||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => | ||
FromCmykBasic.ConvertCore(values, result, this.MaximumValue); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
// Copyright (c) Six Labors. | ||
// Licensed under the Apache License, Version 2.0. | ||
|
||
using System; | ||
using System.Numerics; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
using SixLabors.ImageSharp.Tuples; | ||
|
||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters | ||
{ | ||
internal abstract partial class JpegColorConverter | ||
{ | ||
internal sealed class FromCmykVector8 : Vector8JpegColorConverter | ||
{ | ||
public FromCmykVector8(int precision) | ||
: base(JpegColorSpace.Cmyk, precision) | ||
{ | ||
} | ||
|
||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) | ||
{ | ||
ref Vector<float> cBase = | ||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); | ||
ref Vector<float> mBase = | ||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1)); | ||
ref Vector<float> yBase = | ||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2)); | ||
ref Vector<float> kBase = | ||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3)); | ||
|
||
ref Vector4Octet resultBase = | ||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); | ||
|
||
Vector4Pair cc = default; | ||
Vector4Pair mm = default; | ||
Vector4Pair yy = default; | ||
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc); | ||
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm); | ||
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy); | ||
|
||
var scale = new Vector<float>(1 / this.MaximumValue); | ||
|
||
// Walking 8 elements at one step: | ||
int n = result.Length / 8; | ||
for (int i = 0; i < n; i++) | ||
{ | ||
Vector<float> c = Unsafe.Add(ref cBase, i); | ||
Vector<float> m = Unsafe.Add(ref mBase, i); | ||
Vector<float> y = Unsafe.Add(ref yBase, i); | ||
Vector<float> k = Unsafe.Add(ref kBase, i) * scale; | ||
|
||
c = (c * k) * scale; | ||
m = (m * k) * scale; | ||
y = (y * k) * scale; | ||
|
||
ccRefAsVector = c; | ||
mmRefAsVector = m; | ||
yyRefAsVector = y; | ||
|
||
// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: | ||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); | ||
destination.Pack(ref cc, ref mm, ref yy); | ||
} | ||
} | ||
|
||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => | ||
FromCmykBasic.ConvertCore(values, result, this.MaximumValue); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
// Copyright (c) Six Labors. | ||
// Licensed under the Apache License, Version 2.0. | ||
|
||
using System; | ||
using System.Numerics; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
#if SUPPORTS_RUNTIME_INTRINSICS | ||
using System.Runtime.Intrinsics; | ||
using System.Runtime.Intrinsics.X86; | ||
using static SixLabors.ImageSharp.SimdUtils; | ||
#endif | ||
|
||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters | ||
{ | ||
internal abstract partial class JpegColorConverter | ||
{ | ||
internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter | ||
{ | ||
public FromGrayscaleAvx2(int precision) | ||
: base(JpegColorSpace.Grayscale, precision) | ||
{ | ||
} | ||
|
||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) | ||
{ | ||
#if SUPPORTS_RUNTIME_INTRINSICS | ||
ref Vector256<float> gBase = | ||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); | ||
|
||
ref Vector256<float> resultBase = | ||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); | ||
|
||
// Used for the color conversion | ||
var scale = Vector256.Create(1 / this.MaximumValue); | ||
var one = Vector256.Create(1F); | ||
|
||
// Used for packing | ||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); | ||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); | ||
|
||
int n = result.Length / 8; | ||
for (int i = 0; i < n; i++) | ||
{ | ||
Vector256<float> g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); | ||
|
||
g = Avx.Multiply(g, scale); | ||
|
||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); | ||
|
||
destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); | ||
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); | ||
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); | ||
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); | ||
tkp1n marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
#endif | ||
} | ||
|
||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => | ||
FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find this name misleading, one may think it has to do something with
Vector4
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any suggestions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would just inline it, there would be only one usage, if we'd address my other suggestion.