Skip to content

Commit

Permalink
Merge pull request #1409 from SixLabors/js/Shuffle3Channel
Browse files Browse the repository at this point in the history
3 <==> 4 Channel Shuffling with Hardware Intrinsics
  • Loading branch information
JimBobSquarePants authored Nov 6, 2020
2 parents 8539c9e + 3ee5a38 commit 522a91e
Show file tree
Hide file tree
Showing 23 changed files with 1,866 additions and 318 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
namespace SixLabors.ImageSharp
{
/// <summary>
Expand All @@ -28,28 +31,44 @@ internal interface IComponentShuffle
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest);
}

internal readonly struct DefaultShuffle4 : IComponentShuffle
/// <inheritdoc/>
internal interface IShuffle4 : IComponentShuffle
{
}

internal readonly struct DefaultShuffle4 : IShuffle4
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;

public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
: this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0))
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));

this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}

public DefaultShuffle4(byte control) => this.Control = control;

public byte Control { get; }

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
SimdUtils.Shuffle.InverseMmShuffle(
this.Control,
out int p3,
out int p2,
out int p1,
out int p0);

int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;

for (int i = 0; i < source.Length; i += 4)
{
Expand All @@ -61,22 +80,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
}
}

internal readonly struct WXYZShuffle4 : IComponentShuffle
internal readonly struct WXYZShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);

// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
for (int i = 0; i < s.Length; i++)
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;

for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

Expand All @@ -87,19 +106,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
}
}

internal readonly struct WZYXShuffle4 : IComponentShuffle
internal readonly struct WZYXShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;

for (int i = 0; i < s.Length; i++)
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

Expand All @@ -110,19 +132,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
}
}

internal readonly struct YZWXShuffle4 : IComponentShuffle
internal readonly struct YZWXShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;

for (int i = 0; i < s.Length; i++)
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

Expand All @@ -133,19 +158,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
}
}

internal readonly struct ZYXWShuffle4 : IComponentShuffle
internal readonly struct ZYXWShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;

for (int i = 0; i < s.Length; i++)
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

Expand Down
103 changes: 103 additions & 0 deletions src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IPad3Shuffle4 : IComponentShuffle
{
}

internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;

public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));

this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}

public byte Control { get; }

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);

int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;

Span<byte> temp = stackalloc byte[4];
ref byte t = ref MemoryMarshal.GetReference(temp);
ref uint tu = ref Unsafe.As<byte, uint>(ref t);

for (int i = 0, j = 0; i < source.Length; i += 3, j += 4)
{
ref var s = ref Unsafe.Add(ref sBase, i);
tu = Unsafe.As<byte, uint>(ref s) | 0xFF000000;

Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0);
Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1);
Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2);
Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3);
}
}
}

internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);

ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length);
ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4);

while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd))
{
Unsafe.As<byte, uint>(ref dBase) = Unsafe.As<byte, uint>(ref sBase) | 0xFF000000;

sBase = ref Unsafe.Add(ref sBase, 3);
dBase = ref Unsafe.Add(ref dBase, 4);
}

while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd))
{
Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0);
Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1);
Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2);
Unsafe.Add(ref dBase, 3) = byte.MaxValue;

sBase = ref Unsafe.Add(ref sBase, 3);
dBase = ref Unsafe.Add(ref dBase, 4);
}
}
}
}
53 changes: 53 additions & 0 deletions src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IShuffle3 : IComponentShuffle
{
}

internal readonly struct DefaultShuffle3 : IShuffle3
{
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;

public DefaultShuffle3(byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 2, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 2, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 2, nameof(p0));

this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0);
}

public byte Control { get; }

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);

int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;

for (int i = 0; i < source.Length; i += 3)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
}
}
}
}
Loading

0 comments on commit 522a91e

Please sign in to comment.