Skip to content

Commit

Permalink
Merge pull request #2120 from br3aker/dp/jpeg-encoder-color-conversion
Browse files Browse the repository at this point in the history
Jpeg encoder complete rewrite
  • Loading branch information
JimBobSquarePants authored Aug 11, 2022
2 parents 4a0a5cf + 5d4fa11 commit 13897ae
Show file tree
Hide file tree
Showing 105 changed files with 3,763 additions and 4,496 deletions.
47 changes: 47 additions & 0 deletions src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ public static class HwIntrinsics

public static ReadOnlySpan<byte> PermuteMaskSwitchInnerDWords8x32 => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0 };

private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };

internal static ReadOnlySpan<byte> ExtractRgb => new byte[] { 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF };

private static ReadOnlySpan<byte> ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 };

private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
Expand Down Expand Up @@ -962,6 +966,49 @@ internal static void PackFromRgbPlanesAvx2Reduce(
blueChannel = blueChannel.Slice(slice);
destination = destination.Slice(slice);
}

internal static void UnpackToRgbPlanesAvx2Reduce(
ref Span<float> redChannel,
ref Span<float> greenChannel,
ref Span<float> blueChannel,
ref ReadOnlySpan<Rgb24> source)
{
ref Vector256<byte> rgbByteSpan = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector256<float> destRRef = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(redChannel));
ref Vector256<float> destGRef = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(greenChannel));
ref Vector256<float> destBRef = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(blueChannel));

Vector256<uint> extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes));
Vector256<byte> extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb));
Vector256<byte> rgb, rg, bx;
Vector256<float> r, g, b;

const int bytesPerRgbStride = 24;
int count = (int)((uint)source.Length / 8);
for (int i = 0; i < count; i++)
{
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte();

rgb = Avx2.Shuffle(rgb, extractRgbMask);

rg = Avx2.UnpackLow(rgb, Vector256<byte>.Zero);
bx = Avx2.UnpackHigh(rgb, Vector256<byte>.Zero);

r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, Vector256<byte>.Zero).AsInt32());
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, Vector256<byte>.Zero).AsInt32());
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, Vector256<byte>.Zero).AsInt32());

Unsafe.Add(ref destRRef, i) = r;
Unsafe.Add(ref destGRef, i) = g;
Unsafe.Add(ref destBRef, i) = b;
}

int sliceCount = count * 8;
redChannel = redChannel.Slice(sliceCount);
greenChannel = greenChannel.Slice(sliceCount);
blueChannel = blueChannel.Slice(sliceCount);
source = source.Slice(sliceCount);
}
}
}
}
Expand Down
43 changes: 43 additions & 0 deletions src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,25 @@ internal static void PackFromRgbPlanes(
PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
}

[MethodImpl(InliningOptions.ShortMethod)]
internal static void UnpackToRgbPlanes(
Span<float> redChannel,
Span<float> greenChannel,
Span<float> blueChannel,
ReadOnlySpan<Rgb24> source)
{
DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!");
DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!");
DebugGuard.IsTrue(source.Length <= redChannel.Length, nameof(source), "'source' span should not be bigger than the destination channels!");

if (Avx2.IsSupported)
{
HwIntrinsics.UnpackToRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref source);
}

UnpackToRgbPlanesScalar(redChannel, greenChannel, blueChannel, source);
}

private static void PackFromRgbPlanesScalarBatchedReduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
Expand Down Expand Up @@ -200,5 +219,29 @@ private static void PackFromRgbPlanesRemainder(
d.A = 255;
}
}

private static void UnpackToRgbPlanesScalar(
Span<float> redChannel,
Span<float> greenChannel,
Span<float> blueChannel,
ReadOnlySpan<Rgb24> source)
{
DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!");
DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!");
DebugGuard.IsTrue(source.Length <= redChannel.Length, nameof(source), "'source' span should not be bigger than the destination channels!");

ref float r = ref MemoryMarshal.GetReference(redChannel);
ref float g = ref MemoryMarshal.GetReference(greenChannel);
ref float b = ref MemoryMarshal.GetReference(blueChannel);
ref Rgb24 rgb = ref MemoryMarshal.GetReference(source);

for (int i = 0; i < source.Length; i++)
{
ref Rgb24 src = ref Unsafe.Add(ref rgb, i);
Unsafe.Add(ref r, i) = src.R;
Unsafe.Add(ref g, i) = src.G;
Unsafe.Add(ref b, i) = src.B;
}
}
}
}
15 changes: 15 additions & 0 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,21 @@ public void CopyTo(Span<int> destination)
}
}

public static Block8x8 Load(ReadOnlySpan<byte> data)
{
Unsafe.SkipInit(out Block8x8 result);
result.LoadFrom(data);
return result;
}

public void LoadFrom(ReadOnlySpan<byte> source)
{
for (int i = 0; i < Size; i++)
{
this[i] = source[i];
}
}

/// <summary>
/// Load raw 16bit integers from source.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// <auto-generated />
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
internal partial struct Block8x8F
{
/// <summary>
/// Level shift by +maximum/2, clip to [0, maximum]
Expand Down
159 changes: 159 additions & 0 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System.Numerics;
using System.Runtime.CompilerServices;

// ReSharper disable UseObjectOrCollectionInitializer
// ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
[MethodImpl(InliningOptions.ShortMethod)]
public void ScaledCopyFrom(ref float areaOrigin, int areaStride) =>
CopyFrom1x1Scale(ref Unsafe.As<float, byte>(ref areaOrigin), ref Unsafe.As<Block8x8F, byte>(ref this), areaStride);

[MethodImpl(InliningOptions.ShortMethod)]
public void ScaledCopyTo(ref float areaOrigin, int areaStride, int horizontalScale, int verticalScale)
{
if (horizontalScale == 1 && verticalScale == 1)
{
CopyTo1x1Scale(ref Unsafe.As<Block8x8F, byte>(ref this), ref Unsafe.As<float, byte>(ref areaOrigin), areaStride);
return;
}

if (horizontalScale == 2 && verticalScale == 2)
{
this.CopyTo2x2Scale(ref areaOrigin, areaStride);
return;
}

// TODO: Optimize: implement all cases with scale-specific, loopless code!
this.CopyArbitraryScale(ref areaOrigin, areaStride, horizontalScale, verticalScale);
}

private void CopyTo2x2Scale(ref float areaOrigin, int areaStride)
{
ref Vector2 destBase = ref Unsafe.As<float, Vector2>(ref areaOrigin);
int destStride = (int)((uint)areaStride / 2);

WidenCopyRowImpl2x2(ref this.V0L, ref destBase, 0, destStride);
WidenCopyRowImpl2x2(ref this.V0L, ref destBase, 1, destStride);
WidenCopyRowImpl2x2(ref this.V0L, ref destBase, 2, destStride);
WidenCopyRowImpl2x2(ref this.V0L, ref destBase, 3, destStride);
WidenCopyRowImpl2x2(ref this.V0L, ref destBase, 4, destStride);
WidenCopyRowImpl2x2(ref this.V0L, ref destBase, 5, destStride);
WidenCopyRowImpl2x2(ref this.V0L, ref destBase, 6, destStride);
WidenCopyRowImpl2x2(ref this.V0L, ref destBase, 7, destStride);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void WidenCopyRowImpl2x2(ref Vector4 selfBase, ref Vector2 destBase, nint row, nint destStride)
{
ref Vector4 sLeft = ref Unsafe.Add(ref selfBase, 2 * row);
ref Vector4 sRight = ref Unsafe.Add(ref sLeft, 1);

nint offset = 2 * row * destStride;
ref Vector4 dTopLeft = ref Unsafe.As<Vector2, Vector4>(ref Unsafe.Add(ref destBase, offset));
ref Vector4 dBottomLeft = ref Unsafe.As<Vector2, Vector4>(ref Unsafe.Add(ref destBase, offset + destStride));

var xyLeft = new Vector4(sLeft.X);
xyLeft.Z = sLeft.Y;
xyLeft.W = sLeft.Y;

var zwLeft = new Vector4(sLeft.Z);
zwLeft.Z = sLeft.W;
zwLeft.W = sLeft.W;

var xyRight = new Vector4(sRight.X);
xyRight.Z = sRight.Y;
xyRight.W = sRight.Y;

var zwRight = new Vector4(sRight.Z);
zwRight.Z = sRight.W;
zwRight.W = sRight.W;

dTopLeft = xyLeft;
Unsafe.Add(ref dTopLeft, 1) = zwLeft;
Unsafe.Add(ref dTopLeft, 2) = xyRight;
Unsafe.Add(ref dTopLeft, 3) = zwRight;

dBottomLeft = xyLeft;
Unsafe.Add(ref dBottomLeft, 1) = zwLeft;
Unsafe.Add(ref dBottomLeft, 2) = xyRight;
Unsafe.Add(ref dBottomLeft, 3) = zwRight;
}
}

[MethodImpl(InliningOptions.ColdPath)]
private void CopyArbitraryScale(ref float areaOrigin, int areaStride, int horizontalScale, int verticalScale)
{
for (int y = 0; y < 8; y++)
{
int yy = y * verticalScale;
int y8 = y * 8;

for (int x = 0; x < 8; x++)
{
int xx = x * horizontalScale;

float value = this[y8 + x];
nint baseIdx = (yy * areaStride) + xx;

for (nint i = 0; i < verticalScale; i++, baseIdx += areaStride)
{
for (nint j = 0; j < horizontalScale; j++)
{
// area[xx + j, yy + i] = value;
Unsafe.Add(ref areaOrigin, baseIdx + j) = value;
}
}
}
}
}

private static void CopyTo1x1Scale(ref byte origin, ref byte dest, int areaStride)
{
int destStride = areaStride * sizeof(float);

CopyRowImpl(ref origin, ref dest, destStride, 0);
CopyRowImpl(ref origin, ref dest, destStride, 1);
CopyRowImpl(ref origin, ref dest, destStride, 2);
CopyRowImpl(ref origin, ref dest, destStride, 3);
CopyRowImpl(ref origin, ref dest, destStride, 4);
CopyRowImpl(ref origin, ref dest, destStride, 5);
CopyRowImpl(ref origin, ref dest, destStride, 6);
CopyRowImpl(ref origin, ref dest, destStride, 7);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void CopyRowImpl(ref byte origin, ref byte dest, int destStride, int row)
{
origin = ref Unsafe.Add(ref origin, row * 8 * sizeof(float));
dest = ref Unsafe.Add(ref dest, row * destStride);
Unsafe.CopyBlock(ref dest, ref origin, 8 * sizeof(float));
}
}

private static void CopyFrom1x1Scale(ref byte origin, ref byte dest, int areaStride)
{
int destStride = areaStride * sizeof(float);

CopyRowImpl(ref origin, ref dest, destStride, 0);
CopyRowImpl(ref origin, ref dest, destStride, 1);
CopyRowImpl(ref origin, ref dest, destStride, 2);
CopyRowImpl(ref origin, ref dest, destStride, 3);
CopyRowImpl(ref origin, ref dest, destStride, 4);
CopyRowImpl(ref origin, ref dest, destStride, 5);
CopyRowImpl(ref origin, ref dest, destStride, 6);
CopyRowImpl(ref origin, ref dest, destStride, 7);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void CopyRowImpl(ref byte origin, ref byte dest, int sourceStride, int row)
{
origin = ref Unsafe.Add(ref origin, row * sourceStride);
dest = ref Unsafe.Add(ref dest, row * 8 * sizeof(float));
Unsafe.CopyBlock(ref dest, ref origin, 8 * sizeof(float));
}
}
}
}
Loading

0 comments on commit 13897ae

Please sign in to comment.