Skip to content

Commit

Permalink
Merge pull request #1374 from SixLabors/js/Block8x8F_TransposeAVX
Browse files Browse the repository at this point in the history
Add Avx backed Block8x8F Transpose method
  • Loading branch information
JimBobSquarePants authored Oct 12, 2020
2 parents 4dadf24 + 685693a commit a1784a6
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 12 deletions.
6 changes: 3 additions & 3 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// Transpose the block into the destination block.
/// <summary>
/// Fallback method to transpose a block into the destination block on non AVX supported CPUs.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
public void TransposeIntoFallback(ref Block8x8F d)
{
d.V0L.X = V0L.X;
d.V1L.X = V0L.Y;
Expand Down
6 changes: 3 additions & 3 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// Transpose the block into the destination block.
/// <summary>
/// Fallback method to transpose a block into the destination block on non AVX supported CPUs.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
public void TransposeIntoFallback(ref Block8x8F d)
{
<#
PushIndent(" ");
Expand Down
97 changes: 97 additions & 0 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using System.Text;

// ReSharper disable InconsistentNaming
Expand Down Expand Up @@ -596,5 +600,98 @@ private static void GuardBlockIndex(int idx)
DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
}

/// <summary>
/// Transpose the block into the destination block.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
this.TransposeIntoAvx(ref d);
}
else
#endif
{
this.TransposeIntoFallback(ref d);
}
}

#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// AVX-only variant for executing <see cref="TransposeInto(ref Block8x8F)"/>.
/// <see href="https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536"/>
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeIntoAvx(ref Block8x8F d)
{
Vector256<float> r0 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
1);

Vector256<float> r1 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
1);

Vector256<float> r2 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
1);

Vector256<float> r3 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
1);

Vector256<float> r4 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
1);

Vector256<float> r5 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
1);

Vector256<float> r6 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
1);

Vector256<float> r7 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
1);

Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = Avx.Blend(t0, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = Avx.Blend(t2, v, 0x33);

Vector256<float> t4 = Avx.UnpackLow(r4, r5);
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
v = Avx.Shuffle(t4, t6, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = Avx.Blend(t4, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = Avx.Blend(t6, v, 0x33);

Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
v = Avx.Shuffle(t1, t3, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = Avx.Blend(t1, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = Avx.Blend(t3, v, 0x33);

Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
v = Avx.Shuffle(t5, t7, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = Avx.Blend(t5, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = Avx.Blend(t7, v, 0x33);
}
#endif
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System.Numerics;
Expand Down Expand Up @@ -50,8 +50,6 @@ internal static class FastFloatingPointDCT
/// <param name="temp">Temporary block provided by the caller</param>
public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
{
// TODO: Transpose is a bottleneck now. We need full AVX support to optimize it:
// https://github.com/dotnet/corefx/issues/22940
src.TransposeInto(ref temp);

IDCT8x4_LeftPart(ref temp, ref dest);
Expand Down Expand Up @@ -340,4 +338,4 @@ public static void TransformFDCT(
dest.MultiplyInplace(C_0_125);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;

namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{
public class Block8x8F_Transpose
{
private static readonly Block8x8F Source = Create8x8FloatData();

[Benchmark(Baseline=true)]
public void TransposeIntoVector4()
{
var dest = default(Block8x8F);
Source.TransposeIntoFallback(ref dest);
}

#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void TransposeIntoAvx()
{
var dest = default(Block8x8F);
Source.TransposeIntoAvx(ref dest);
}
#endif

private static Block8x8F Create8x8FloatData()
{
var result = new float[64];
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
result[(i * 8) + j] = (i * 10) + j;
}
}

var source = default(Block8x8F);
source.LoadFrom(result);
return source;
}
}
}
24 changes: 22 additions & 2 deletions tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public void Load_Store_IntArray()
}

[Fact]
public void TransposeInto()
public void TransposeIntoFallback()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
Expand All @@ -172,14 +172,34 @@ public void TransposeInto()
source.LoadFrom(Create8x8FloatData());

var dest = default(Block8x8F);
source.TransposeInto(ref dest);
source.TransposeIntoFallback(ref dest);

float[] actual = new float[64];
dest.ScaledCopyTo(actual);

Assert.Equal(expected, actual);
}

#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void TransposeIntoAvx()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);

var source = default(Block8x8F);
source.LoadFrom(Create8x8FloatData());

var dest = default(Block8x8F);
source.TransposeIntoAvx(ref dest);

float[] actual = new float[64];
dest.ScaledCopyTo(actual);

Assert.Equal(expected, actual);
}
#endif

private class BufferHolder
{
public Block8x8F Buffer;
Expand Down

0 comments on commit a1784a6

Please sign in to comment.