Skip to content

Commit

Permalink
Merge pull request #1849 from SixLabors/bp/vectoraddavx
Browse files Browse the repository at this point in the history
Add AVX2 version of AddVector
  • Loading branch information
brianpopow authored Nov 25, 2021
2 parents e11f318 + 2d60b73 commit 1713891
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 49 deletions.
4 changes: 3 additions & 1 deletion src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ public static int FindMatchLength(ReadOnlySpan<uint> array1, ReadOnlySpan<uint>
public static int VectorMismatch(ReadOnlySpan<uint> array1, ReadOnlySpan<uint> array2, int length)
{
int matchLen = 0;
ref uint array1Ref = ref MemoryMarshal.GetReference(array1);
ref uint array2Ref = ref MemoryMarshal.GetReference(array2);

while (matchLen < length && array1[matchLen] == array2[matchLen])
while (matchLen < length && Unsafe.Add(ref array1Ref, matchLen) == Unsafe.Add(ref array2Ref, matchLen))
{
matchLen++;
}
Expand Down
55 changes: 51 additions & 4 deletions src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@

using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal class Vp8LHistogram : IDeepCloneable
internal sealed class Vp8LHistogram : IDeepCloneable
{
private const uint NonTrivialSym = 0xffffffff;

Expand Down Expand Up @@ -505,11 +511,52 @@ private static double ExtraCost(Span<uint> population, int length)
return cost;
}

private static void AddVector(uint[] a, uint[] b, uint[] output, int size)
private static void AddVector(Span<uint> a, Span<uint> b, Span<uint> output, int count)
{
for (int i = 0; i < size; i++)
DebugGuard.MustBeGreaterThanOrEqualTo(a.Length, count, nameof(a.Length));
DebugGuard.MustBeGreaterThanOrEqualTo(b.Length, count, nameof(b.Length));
DebugGuard.MustBeGreaterThanOrEqualTo(output.Length, count, nameof(output.Length));

#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
ref uint aRef = ref MemoryMarshal.GetReference(a);
ref uint bRef = ref MemoryMarshal.GetReference(b);
ref uint outputRef = ref MemoryMarshal.GetReference(output);
int i;

for (i = 0; i + 32 <= count; i += 32)
{
// Load values.
Vector256<uint> a0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref aRef, i));
Vector256<uint> a1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref aRef, i + 8));
Vector256<uint> a2 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref aRef, i + 16));
Vector256<uint> a3 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref aRef, i + 24));
Vector256<uint> b0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref bRef, i));
Vector256<uint> b1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref bRef, i + 8));
Vector256<uint> b2 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref bRef, i + 16));
Vector256<uint> b3 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref bRef, i + 24));

// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
// that's ok since the histogram values are less than 1<<28 (max picture count).
Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref outputRef, i)) = Avx2.Add(a0, b0);
Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref outputRef, i + 8)) = Avx2.Add(a1, b1);
Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref outputRef, i + 16)) = Avx2.Add(a2, b2);
Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref outputRef, i + 24)) = Avx2.Add(a3, b3);
}

for (; i < count; i++)
{
output[i] = a[i] + b[i];
}
}
else
#endif
{
output[i] = a[i] + b[i];
for (int i = 0; i < count; i++)
{
output[i] = a[i] + b[i];
}
}
}
}
Expand Down
46 changes: 2 additions & 44 deletions src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
internal class Vp8Histogram
internal sealed class Vp8Histogram
{
private readonly int[] scratch = new int[16];

Expand Down Expand Up @@ -49,7 +49,7 @@ public void CollectHistogram(Span<byte> reference, Span<byte> pred, int startBlo
this.distribution.AsSpan().Clear();
for (j = startBlock; j < endBlock; j++)
{
this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output);
Vp8Encoding.FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output, this.scratch);

// Convert coefficients to bin.
for (int k = 0; k < 16; ++k)
Expand Down Expand Up @@ -98,48 +98,6 @@ private void SetHistogramData(int[] distribution)
this.lastNonZero = lastNonZero;
}

private void Vp8FTransform(Span<byte> src, Span<byte> reference, Span<short> output)
{
int i;
Span<int> tmp = this.scratch;
tmp.Clear();

for (i = 0; i < 4; i++)
{
int d0 = src[0] - reference[0]; // 9bit dynamic range ([-255,255])
int d1 = src[1] - reference[1];
int d2 = src[2] - reference[2];
int d3 = src[3] - reference[3];
int a0 = d0 + d3; // 10b [-510,510]
int a1 = d1 + d2;
int a2 = d1 - d2;
int a3 = d0 - d3;
tmp[0 + (i * 4)] = (a0 + a1) * 8; // 14b [-8160,8160]
tmp[1 + (i * 4)] = ((a2 * 2217) + (a3 * 5352) + 1812) >> 9; // [-7536,7542]
tmp[2 + (i * 4)] = (a0 - a1) * 8;
tmp[3 + (i * 4)] = ((a3 * 2217) - (a2 * 5352) + 937) >> 9;

// Do not change the span in the last iteration.
if (i < 3)
{
src = src.Slice(WebpConstants.Bps);
reference = reference.Slice(WebpConstants.Bps);
}
}

for (i = 0; i < 4; i++)
{
int a0 = tmp[0 + i] + tmp[12 + i]; // 15b
int a1 = tmp[4 + i] + tmp[8 + i];
int a2 = tmp[4 + i] - tmp[8 + i];
int a3 = tmp[0 + i] - tmp[12 + i];
output[0 + i] = (short)((a0 + a1 + 7) >> 4); // 12b
output[4 + i] = (short)((((a2 * 2217) + (a3 * 5352) + 12000) >> 16) + (a3 != 0 ? 1 : 0));
output[8 + i] = (short)((a0 - a1 + 7) >> 4);
output[12 + i] = (short)(((a3 * 2217) - (a2 * 5352) + 51000) >> 16);
}
}

[MethodImpl(InliningOptions.ShortMethod)]
private static int ClipMax(int v, int max) => v > max ? max : v;
}
Expand Down
1 change: 1 addition & 0 deletions tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ private static void RunPredictor13Test()
public void TransformColorInverse_Works() => RunTransformColorInverseTest();

#if SUPPORTS_RUNTIME_INTRINSICS

[Fact]
public void Predictor11_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.AllowAll);

Expand Down
109 changes: 109 additions & 0 deletions tests/ImageSharp.Tests/Formats/WebP/Vp8LHistogramTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Linq;
using SixLabors.ImageSharp.Formats.Webp.Lossless;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;

namespace SixLabors.ImageSharp.Tests.Formats.WebP
{
public class Vp8LHistogramTests
{
private static void RunAddVectorTest()
{
// arrange
uint[] pixelData =
{
4278191104, 4278191104, 4278191104, 4278191104, 4278191104, 4278191104, 4278191104, 4294577152,
4294707200, 4294707200, 4294707200, 4294707200, 4294837248, 4294837248, 4293926912, 4294316544,
4278191104, 4278191104, 4294837248, 4294837248, 4280287232, 4280350720, 4294447104, 4294707200,
4294838272, 4278516736, 4294837248, 4294837248, 4278516736, 4294707200, 4279298048, 4294837248,
4294837248, 4294837248, 4294837248, 4280287232, 4280287232, 4292670464, 4279633408, 4294838272,
4294837248, 4278516736, 4278516736, 4278516736, 4278516736, 4278516736, 4278778880, 4278193152,
4278191104, 4280287232, 4280287232, 4280287232, 4280287232, 4293971968, 4280612864, 4292802560,
4294837760, 4278516736, 4278516736, 4294837760, 4294707712, 4278516736, 4294837248, 4278193152,
4280287232, 4278984704, 4280287232, 4278243328, 4280287232, 4278244352, 4280287232, 4280025088,
4280025088, 4294837760, 4278192128, 4294838784, 4294837760, 4294707712, 4278778880, 4278324224,
4280287232, 4280287232, 4278202368, 4279115776, 4280287232, 4278243328, 4280287232, 4280287232,
4280025088, 4280287232, 4278192128, 4294838272, 4294838272, 4294837760, 4278190592, 4278778880,
4280875008, 4280287232, 4279896576, 4281075712, 4281075712, 4280287232, 4280287232, 4280287232,
4280287232, 4280287232, 4278190592, 4294709248, 4278516736, 4278516736, 4278584832, 4278909440,
4280287232, 4280287232, 4294367744, 4294621184, 4279115776, 4280287232, 4280287232, 4280351744,
4280287232, 4280287232, 4280287232, 4278513664, 4278516736, 4278716416, 4278584832, 4280291328,
4293062144, 4280287232, 4280287232, 4280287232, 4294456320, 4280291328, 4280287232, 4280287232,
4280287232, 4280287232, 4280287232, 4280287232, 4278513152, 4278716416, 4278584832, 4280291328,
4278198272, 4278198272, 4278589952, 4278198272, 4278198272, 4280287232, 4278765568, 4280287232,
4280287232, 4280287232, 4280287232, 4294712832, 4278513152, 4278716640, 4279300608, 4278584832,
4280156672, 4279373312, 4278589952, 4279373312, 4278328832, 4278328832, 4278328832, 4279634432,
4280287232, 4280287232, 4280287232, 4280287232, 4278457344, 4280483328, 4278584832, 4278385664,
4279634432, 4279373312, 4279634432, 4280287232, 4280287232, 4280156672, 4278589952, 4278328832,
4278198272, 4280156672, 4280483328, 4294363648, 4280287232, 4278376448, 4280287232, 4278647808,
4280287232, 4280287232, 4279373312, 4280287232, 4280287232, 4280156672, 4280287232, 4278198272,
4278198272, 4280156672, 4280287232, 4280287232, 4293669888, 4278765568, 4278765568, 4280287232,
4280287232, 4280287232, 4279634432, 4279634432, 4280287232, 4280287232, 4280287232, 4280287232,
4280287232, 4280287232, 4280287232, 4280287232, 4279373312, 4279764992, 4293539328, 4279896576,
4280287232, 4280287232, 4280287232, 4279634432, 4278198272, 4279634432, 4280287232, 4280287232,
4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4279503872, 4279503872, 4280288256,
4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232,
4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232
};

uint[] literals =
{
198, 0, 14, 0, 46, 0, 22, 0, 36, 0, 24, 0, 12, 0, 10, 0, 10, 0, 2, 0, 2, 0, 0, 0, 0, 0, 6, 0, 0, 0,
10, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 6, 0, 2, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 6, 0, 2, 0, 2, 0, 2, 0, 0, 0, 8, 0, 2, 0, 38, 0, 4
};

uint[] expectedLiterals = new uint[1305];

// All remaining values are expected to be zero.
literals.AsSpan().CopyTo(expectedLiterals);

var backwardRefs = new Vp8LBackwardRefs(pixelData.Length);
for (int i = 0; i < pixelData.Length; i++)
{
backwardRefs.Add(new PixOrCopy()
{
BgraOrDistance = pixelData[i],
Len = 1,
Mode = PixOrCopyMode.Literal
});
}

var histogram0 = new Vp8LHistogram(backwardRefs, 3);
var histogram1 = new Vp8LHistogram(backwardRefs, 3);
for (int i = 0; i < 5; i++)
{
histogram0.IsUsed[i] = true;
histogram1.IsUsed[i] = true;
}

var output = new Vp8LHistogram(3);

// act
histogram0.Add(histogram1, output);

// assert
Assert.True(output.Literal.SequenceEqual(expectedLiterals));
}

[Fact]
public void AddVector_Works() => RunAddVectorTest();

#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void AddVector_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddVectorTest, HwIntrinsics.AllowAll);

[Fact]
public void AddVector_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddVectorTest, HwIntrinsics.DisableAVX2);
#endif
}
}

0 comments on commit 1713891

Please sign in to comment.