Skip to content

Commit

Permalink
Merge pull request #1411 from tkp1n/tkp1n/avx2-color-converter
Browse files Browse the repository at this point in the history
Vectorize (AVX2) JPEG Color Converter
  • Loading branch information
JimBobSquarePants authored Nov 7, 2020
2 parents 522a91e + 963e5c6 commit d2de2a1
Show file tree
Hide file tree
Showing 29 changed files with 1,643 additions and 455 deletions.
19 changes: 19 additions & 0 deletions src/ImageSharp/Common/Helpers/SimdUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,25 @@ internal static partial class SimdUtils
public static bool HasVector8 { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;

/// <summary>
/// Gets a value indicating whether <see cref="Vector{T}"/> code is being JIT-ed to SSE instructions
/// where float and integer registers are of size 128 byte.
/// </summary>
public static bool HasVector4 { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 4;

public static bool HasAvx2
{
get
{
#if SUPPORTS_RUNTIME_INTRINSICS
return Avx2.IsSupported;
#else
return false;
#endif
}
}

/// <summary>
/// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter
{
protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision)
: base(colorSpace, precision, 8)
{
}

protected sealed override bool IsAvailable => SimdUtils.HasAvx2;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class BasicJpegColorConverter : JpegColorConverter
{
protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision)
: base(colorSpace, precision)
{
}

protected override bool IsAvailable => true;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif

namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmykAvx2 : Avx2JpegColorConverter
{
public FromCmykAvx2(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}

protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> cBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> mBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));

ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));

// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);

// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);

int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol);
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol);
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol);
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol);

k = Avx.Multiply(k, scale);

c = Avx.Multiply(Avx.Multiply(c, k), scale);
m = Avx.Multiply(Avx.Multiply(m, k), scale);
y = Avx.Multiply(Avx.Multiply(y, k), scale);

Vector256<float> cmLo = Avx.UnpackLow(c, m);
Vector256<float> yoLo = Avx.UnpackLow(y, one);
Vector256<float> cmHi = Avx.UnpackHigh(c, m);
Vector256<float> yoHi = Avx.UnpackHigh(y, one);

ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);

destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10);
}
#endif
}

protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,36 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmyk : JpegColorConverter
internal sealed class FromCmykBasic : BasicJpegColorConverter
{
public FromCmyk(int precision)
public FromCmykBasic(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}

public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
ConvertCore(values, result, this.MaximumValue);
}

internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
ReadOnlySpan<float> cVals = values.Component0;
ReadOnlySpan<float> mVals = values.Component1;
ReadOnlySpan<float> yVals = values.Component2;
ReadOnlySpan<float> kVals = values.Component3;

var v = new Vector4(0, 0, 0, 1F);

var maximum = 1 / this.MaximumValue;
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);

for (int i = 0; i < result.Length; i++)
{
float c = cVals[i];
float m = mVals[i];
float y = yVals[i];
float k = kVals[i] / this.MaximumValue;
float k = kVals[i] / maxValue;

v.X = c * k;
v.Y = m * k;
Expand All @@ -47,4 +51,4 @@ public override void ConvertToRgba(in ComponentValues values, Span<Vector4> resu
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;

namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmykVector8 : Vector8JpegColorConverter
{
public FromCmykVector8(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}

protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
ref Vector<float> cBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> mBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));

ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));

Vector4Pair cc = default;
Vector4Pair mm = default;
Vector4Pair yy = default;
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc);
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm);
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy);

var scale = new Vector<float>(1 / this.MaximumValue);

// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> c = Unsafe.Add(ref cBase, i);
Vector<float> m = Unsafe.Add(ref mBase, i);
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> k = Unsafe.Add(ref kBase, i) * scale;

c = (c * k) * scale;
m = (m * k) * scale;
y = (y * k) * scale;

ccRefAsVector = c;
mmRefAsVector = m;
yyRefAsVector = y;

// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref cc, ref mm, ref yy);
}
}

protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif

namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter
{
public FromGrayscaleAvx2(int precision)
: base(JpegColorSpace.Grayscale, precision)
{
}

protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> gBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));

ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));

// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);

// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);

int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> g = Avx.Multiply(Unsafe.Add(ref gBase, i), scale);

g = Avx2.PermuteVar8x32(g, vcontrol);

ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);

destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000);
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Shuffle(g, g, 0b01_01_01_01), one, 0b1000_1000);
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Shuffle(g, g, 0b10_10_10_10), one, 0b1000_1000);
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Shuffle(g, g, 0b11_11_11_11), one, 0b1000_1000);
}
#endif
}

protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscale : JpegColorConverter
internal sealed class FromGrayscaleBasic : BasicJpegColorConverter
{
public FromGrayscale(int precision)
public FromGrayscaleBasic(int precision)
: base(JpegColorSpace.Grayscale, precision)
{
}

public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
var maximum = 1 / this.MaximumValue;
ConvertCore(values, result, this.MaximumValue);
}

internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);

ref float sBase = ref MemoryMarshal.GetReference(values.Component0);
Expand All @@ -35,4 +40,4 @@ public override void ConvertToRgba(in ComponentValues values, Span<Vector4> resu
}
}
}
}
}
Loading

0 comments on commit d2de2a1

Please sign in to comment.