diff --git a/src/ImageSharp/Common/Extensions/SimdUtils.cs b/src/ImageSharp/Common/Extensions/SimdUtils.cs deleted file mode 100644 index 7b77fefcac..0000000000 --- a/src/ImageSharp/Common/Extensions/SimdUtils.cs +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (c) Six Labors and contributors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -namespace SixLabors.ImageSharp -{ - /// - /// Various extension and utility methods for and utilizing SIMD capabilities - /// - internal static class SimdUtils - { - /// - /// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte. - /// - public static bool IsAvx2CompatibleArchitecture => Vector.Count == 8 && Vector.Count == 8; - - internal static void GuardAvx2(string operation) - { - if (!IsAvx2CompatibleArchitecture) - { - throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!"); - } - } - - /// - /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector4 PseudoRound(this Vector4 v) - { - var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1)); - - return v + (sign * 0.5f); - } - - /// - /// Rounds all values in 'v' to the nearest integer following semantics. - /// Source: - /// - /// https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110 - /// - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector FastRound(this Vector x) - { - Vector magic0 = new Vector(int.MinValue); // 0x80000000 - Vector sgn0 = Vector.AsVectorSingle(magic0); - Vector and0 = Vector.BitwiseAnd(sgn0, x); - Vector or0 = Vector.BitwiseOr(and0, new Vector(8388608.0f)); - Vector add0 = Vector.Add(x, or0); - Vector sub0 = Vector.Subtract(add0, or0); - return sub0; - } - - /// - /// Convert 'source.Length' values normalized into [0..1] from 'source' into 'dest' buffer of values. - /// The values gonna be scaled up into [0-255] and rounded. - /// Based on: - /// - /// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions - /// - /// - internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan source, Span dest) - { - GuardAvx2(nameof(BulkConvertNormalizedFloatToByte)); - - DebugGuard.IsTrue((source.Length % Vector.Count) == 0, nameof(source), "source.Length should be divisable by Vector.Count!"); - - if (source.Length == 0) - { - return; - } - - ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); - int n = source.Length / 8; - - Vector magick = new Vector(32768.0f); - Vector scale = new Vector(255f) / new Vector(256f); - - // need to copy to a temporary struct, because - // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x) - // does not work. TODO: This might be a CoreClr bug, need to ask/report - var temp = default(Octet.OfUInt32); - ref Vector tempRef = ref Unsafe.As>(ref temp); - - for (int i = 0; i < n; i++) - { - // union { float f; uint32_t i; } u; - // u.f = 32768.0f + x * (255.0f / 256.0f); - // return (uint8_t)u.i; - Vector x = Unsafe.Add(ref srcBase, i); - x = (x * scale) + magick; - tempRef = x; - - ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); - d.LoadFrom(ref temp); - } - } - - /// - /// Same as but clamps overflown values before conversion. - /// - internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan source, Span dest) - { - GuardAvx2(nameof(BulkConvertNormalizedFloatToByte)); - - DebugGuard.IsTrue((source.Length % Vector.Count) == 0, nameof(source), "source.Length should be divisable by Vector.Count!"); - - if (source.Length == 0) - { - return; - } - - ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); - int n = source.Length / 8; - - Vector magick = new Vector(32768.0f); - Vector scale = new Vector(255f) / new Vector(256f); - - // need to copy to a temporary struct, because - // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x) - // does not work. TODO: This might be a CoreClr bug, need to ask/report - var temp = default(Octet.OfUInt32); - ref Vector tempRef = ref Unsafe.As>(ref temp); - - for (int i = 0; i < n; i++) - { - // union { float f; uint32_t i; } u; - // u.f = 32768.0f + x * (255.0f / 256.0f); - // return (uint8_t)u.i; - Vector x = Unsafe.Add(ref srcBase, i); - x = Vector.Max(x, Vector.Zero); - x = Vector.Min(x, Vector.One); - - x = (x * scale) + magick; - tempRef = x; - - ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); - d.LoadFrom(ref temp); - } - } - - // TODO: Replace these with T4-d library level tuples! - internal static class Octet - { - [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))] - public struct OfUInt32 - { - [FieldOffset(0 * sizeof(uint))] - public uint V0; - - [FieldOffset(1 * sizeof(uint))] - public uint V1; - - [FieldOffset(2 * sizeof(uint))] - public uint V2; - - [FieldOffset(3 * sizeof(uint))] - public uint V3; - - [FieldOffset(4 * sizeof(uint))] - public uint V4; - - [FieldOffset(5 * sizeof(uint))] - public uint V5; - - [FieldOffset(6 * sizeof(uint))] - public uint V6; - - [FieldOffset(7 * sizeof(uint))] - public uint V7; - - public override string ToString() - { - return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; - } - } - - [StructLayout(LayoutKind.Explicit, Size = 8)] - public struct OfByte - { - [FieldOffset(0)] - public byte V0; - - [FieldOffset(1)] - public byte V1; - - [FieldOffset(2)] - public byte V2; - - [FieldOffset(3)] - public byte V3; - - [FieldOffset(4)] - public byte V4; - - [FieldOffset(5)] - public byte V5; - - [FieldOffset(6)] - public byte V6; - - [FieldOffset(7)] - public byte V7; - - public override string ToString() - { - return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; - } - - public void LoadFrom(ref OfUInt32 i) - { - this.V0 = (byte)i.V0; - this.V1 = (byte)i.V1; - this.V2 = (byte)i.V2; - this.V3 = (byte)i.V3; - this.V4 = (byte)i.V4; - this.V5 = (byte)i.V5; - this.V6 = (byte)i.V6; - this.V7 = (byte)i.V7; - } - } - } - } -} \ No newline at end of file diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs index 35769d96a7..02a2e9ee55 100644 --- a/src/ImageSharp/Common/Helpers/ImageMaths.cs +++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs @@ -39,6 +39,28 @@ public static int LeastCommonMultiple(int a, int b) return (a / GreatestCommonDivisor(a, b)) * b; } + /// + /// Calculates % 4 + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static int Modulo4(int x) => x & 3; + + /// + /// Calculates % 8 + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static int Modulo8(int x) => x & 7; + + /// + /// Fast (x mod m) calculator, with the restriction that + /// should be power of 2. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static int ModuloP2(int x, int m) + { + return x & (m - 1); + } + /// /// Returns the absolute value of a 32-bit signed integer. Uses bit shifting to speed up the operation. /// @@ -46,7 +68,7 @@ public static int LeastCommonMultiple(int a, int b) /// A number that is greater than , but less than or equal to /// /// The - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(InliningOptions.ShortMethod)] public static int FastAbs(int x) { int y = x >> 31; @@ -58,7 +80,7 @@ public static int FastAbs(int x) /// /// A single-precision floating-point number /// The number raised to the power of 2. - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(InliningOptions.ShortMethod)] public static float Pow2(float x) => x * x; /// @@ -66,7 +88,7 @@ public static int FastAbs(int x) /// /// A single-precision floating-point number /// The number raised to the power of 3. - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(InliningOptions.ShortMethod)] public static float Pow3(float x) => x * x * x; /// @@ -77,7 +99,7 @@ public static int FastAbs(int x) /// /// The /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(InliningOptions.ShortMethod)] public static int GetBitsNeededForColorDepth(int colors) => Math.Max(1, (int)Math.Ceiling(Math.Log(colors, 2))); /// @@ -85,7 +107,7 @@ public static int FastAbs(int x) /// /// The bit depth. /// The - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(InliningOptions.ShortMethod)] public static int GetColorCountForBitDepth(int bitDepth) => 1 << bitDepth; /// @@ -94,7 +116,7 @@ public static int FastAbs(int x) /// The x provided to G(x). /// The spread of the blur. /// The Gaussian G(x) - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(InliningOptions.ShortMethod)] public static float Gaussian(float x, float sigma) { const float Numerator = 1.0f; @@ -117,7 +139,7 @@ public static float Gaussian(float x, float sigma) /// /// The sine cardinal of . /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(InliningOptions.ShortMethod)] public static float SinC(float f) { if (MathF.Abs(f) > Constants.Epsilon) @@ -140,7 +162,7 @@ public static float SinC(float f) /// /// The . /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(InliningOptions.ShortMethod)] public static float GetBcValue(float x, float b, float c) { if (x < 0F) @@ -176,7 +198,7 @@ public static float GetBcValue(float x, float b, float c) /// /// The bounding . /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(InliningOptions.ShortMethod)] public static Rectangle GetBoundingRectangle(Point topLeft, Point bottomRight) => new Rectangle(topLeft.X, topLeft.Y, bottomRight.X - topLeft.X, bottomRight.Y - topLeft.Y); /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs new file mode 100644 index 0000000000..0f1ce2ab6a --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs @@ -0,0 +1,215 @@ +// Copyright (c) Six Labors and contributors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +// ReSharper disable MemberHidesStaticFromOuterClass +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + /// + /// Implementation with 256bit / AVX2 intrinsics NOT depending on newer API-s (Vector.Widen etc.) + /// + public static class BasicIntrinsics256 + { + public static bool IsAvailable { get; } = IsAvx2CompatibleArchitecture; + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void BulkConvertByteToNormalizedFloatReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (!IsAvailable) + { + return; + } + + int remainder = ImageMaths.Modulo8(source.Length); + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + BulkConvertByteToNormalizedFloat( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (!IsAvailable) + { + return; + } + + int remainder = ImageMaths.Modulo8(source.Length); + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + + /// + /// SIMD optimized implementation for . + /// Works only with span Length divisible by 8. + /// Implementation adapted from: + /// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions + /// http://stackoverflow.com/a/536278 + /// + internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest) + { + VerifyIsAvx2Compatible(nameof(BulkConvertByteToNormalizedFloat)); + VerifySpanInput(source, dest, 8); + + var bVec = new Vector(256.0f / 255.0f); + var magicFloat = new Vector(32768.0f); + var magicInt = new Vector(1191182336); // reinterpreded value of 32768.0f + var mask = new Vector(255); + + ref Octet.OfByte sourceBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + + ref Vector destBaseAsFloat = ref Unsafe.As>(ref destBaseAsWideOctet); + + int n = dest.Length / 8; + + for (int i = 0; i < n; i++) + { + ref Octet.OfByte s = ref Unsafe.Add(ref sourceBase, i); + ref Octet.OfUInt32 d = ref Unsafe.Add(ref destBaseAsWideOctet, i); + d.LoadFrom(ref s); + } + + for (int i = 0; i < n; i++) + { + ref Vector df = ref Unsafe.Add(ref destBaseAsFloat, i); + + var vi = Vector.AsVectorUInt32(df); + vi &= mask; + vi |= magicInt; + + var vf = Vector.AsVectorSingle(vi); + vf = (vf - magicFloat) * bVec; + + df = vf; + } + } + + /// + /// Implementation of which is faster on older runtimes. + /// + internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan source, Span dest) + { + VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByteClampOverflows)); + VerifySpanInput(source, dest, 8); + + if (source.Length == 0) + { + return; + } + + ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 8; + + Vector magick = new Vector(32768.0f); + Vector scale = new Vector(255f) / new Vector(256f); + + // need to copy to a temporary struct, because + // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x) + // does not work. TODO: This might be a CoreClr bug, need to ask/report + var temp = default(Octet.OfUInt32); + ref Vector tempRef = ref Unsafe.As>(ref temp); + + for (int i = 0; i < n; i++) + { + // union { float f; uint32_t i; } u; + // u.f = 32768.0f + x * (255.0f / 256.0f); + // return (uint8_t)u.i; + Vector x = Unsafe.Add(ref srcBase, i); + x = Vector.Max(x, Vector.Zero); + x = Vector.Min(x, Vector.One); + + x = (x * scale) + magick; + tempRef = x; + + ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); + d.LoadFrom(ref temp); + } + } + + /// + /// Convert all values normalized into [0..1] from 'source' + /// into 'dest' buffer of . The values are scaled up into [0-255] and rounded. + /// This implementation is SIMD optimized and works only when span Length is divisible by 8. + /// Based on: + /// + /// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions + /// + /// + internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan source, Span dest) + { + VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByte)); + VerifySpanInput(source, dest, 8); + + if (source.Length == 0) + { + return; + } + + ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 8; + + Vector magick = new Vector(32768.0f); + Vector scale = new Vector(255f) / new Vector(256f); + + // need to copy to a temporary struct, because + // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x) + // does not work. TODO: This might be a CoreClr bug, need to ask/report + var temp = default(Octet.OfUInt32); + ref Vector tempRef = ref Unsafe.As>(ref temp); + + for (int i = 0; i < n; i++) + { + // union { float f; uint32_t i; } u; + // u.f = 32768.0f + x * (255.0f / 256.0f); + // return (uint8_t)u.i; + Vector x = Unsafe.Add(ref srcBase, i); + x = (x * scale) + magick; + tempRef = x; + + ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); + d.LoadFrom(ref temp); + } + } + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs new file mode 100644 index 0000000000..e0d6187dca --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs @@ -0,0 +1,178 @@ +using System; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// ReSharper disable MemberHidesStaticFromOuterClass +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + /// + /// Implementation methods based on newer API-s (Vector.Widen, Vector.Narrow, Vector.ConvertTo*). + /// Only accelerated only on RyuJIT having dotnet/coreclr#10662 merged (.NET Core 2.1+ .NET 4.7.2+) + /// See: + /// https://github.com/dotnet/coreclr/pull/10662 + /// API Proposal: + /// https://github.com/dotnet/corefx/issues/15957 + /// + public static class ExtendedIntrinsics + { + public static bool IsAvailable { get; } = +#if NETCOREAPP2_1 + // TODO: Also available in .NET 4.7.2, we need to add a build target! + Vector.IsHardwareAccelerated; +#else + false; +#endif + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void BulkConvertByteToNormalizedFloatReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (!IsAvailable) + { + return; + } + + int remainder = ImageMaths.ModuloP2(source.Length, Vector.Count); + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + BulkConvertByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (!IsAvailable) + { + return; + } + + int remainder = ImageMaths.ModuloP2(source.Length, Vector.Count); + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + BulkConvertNormalizedFloatToByteClampOverflows( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + + /// + /// Implementation , which is faster on new RyuJIT runtime. + /// + internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest) + { + VerifySpanInput(source, dest, Vector.Count); + + int n = dest.Length / Vector.Count; + + ref Vector sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + Vector b = Unsafe.Add(ref sourceBase, i); + + Vector.Widen(b, out Vector s0, out Vector s1); + Vector.Widen(s0, out Vector w0, out Vector w1); + Vector.Widen(s1, out Vector w2, out Vector w3); + + Vector f0 = ConvertToSingle(w0); + Vector f1 = ConvertToSingle(w1); + Vector f2 = ConvertToSingle(w2); + Vector f3 = ConvertToSingle(w3); + + ref Vector d = ref Unsafe.Add(ref destBase, i * 4); + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + + /// + /// Implementation of , which is faster on new .NET runtime. + /// + internal static void BulkConvertNormalizedFloatToByteClampOverflows( + ReadOnlySpan source, + Span dest) + { + VerifySpanInput(source, dest, Vector.Count); + + int n = dest.Length / Vector.Count; + + ref Vector sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + ref Vector s = ref Unsafe.Add(ref sourceBase, i * 4); + + Vector f0 = s; + Vector f1 = Unsafe.Add(ref s, 1); + Vector f2 = Unsafe.Add(ref s, 2); + Vector f3 = Unsafe.Add(ref s, 3); + + Vector w0 = ConvertToUInt32(f0); + Vector w1 = ConvertToUInt32(f1); + Vector w2 = ConvertToUInt32(f2); + Vector w3 = ConvertToUInt32(f3); + + Vector u0 = Vector.Narrow(w0, w1); + Vector u1 = Vector.Narrow(w2, w3); + + Vector b = Vector.Narrow(u0, u1); + + Unsafe.Add(ref destBase, i) = b; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector ConvertToUInt32(Vector vf) + { + Vector maxBytes = new Vector(255f); + vf *= maxBytes; + vf += new Vector(0.5f); + vf = Vector.Min(Vector.Max(vf, Vector.Zero), maxBytes); + Vector vi = Vector.ConvertToInt32(vf); + return Vector.AsVectorUInt32(vi); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector ConvertToSingle(Vector u) + { + Vector vi = Vector.AsVectorInt32(u); + Vector v = Vector.ConvertToSingle(vi); + v *= new Vector(1f / 255f); + return v; + } + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs b/src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs new file mode 100644 index 0000000000..565ea08f5d --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs @@ -0,0 +1,151 @@ +// Copyright (c) Six Labors and contributors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// ReSharper disable MemberHidesStaticFromOuterClass +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + /// + /// Fallback implementation based on (128bit). + /// For , efficient software fallback implementations are present, + /// and we hope that even mono's JIT is able to emit SIMD instructions for that type :P + /// + public static class FallbackIntrinsics128 + { + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void BulkConvertByteToNormalizedFloatReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + int remainder = ImageMaths.Modulo4(source.Length); + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + BulkConvertByteToNormalizedFloat( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + int remainder = ImageMaths.Modulo4(source.Length); + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + BulkConvertNormalizedFloatToByteClampOverflows( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + + /// + /// Implementation of using . + /// + [MethodImpl(InliningOptions.ColdPath)] + internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest) + { + VerifySpanInput(source, dest, 4); + + int count = dest.Length / 4; + if (count == 0) + { + return; + } + + ref ByteVector4 sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref Vector4 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + + const float Scale = 1f / 255f; + Vector4 d = default; + + for (int i = 0; i < count; i++) + { + ref ByteVector4 s = ref Unsafe.Add(ref sBase, i); + d.X = s.X; + d.Y = s.Y; + d.Z = s.Z; + d.W = s.W; + d *= Scale; + Unsafe.Add(ref dBase, i) = d; + } + } + + /// + /// Implementation of using . + /// + [MethodImpl(InliningOptions.ColdPath)] + internal static void BulkConvertNormalizedFloatToByteClampOverflows( + ReadOnlySpan source, + Span dest) + { + VerifySpanInput(source, dest, 4); + + int count = source.Length / 4; + if (count == 0) + { + return; + } + + ref Vector4 sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref ByteVector4 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + + var half = new Vector4(0.5f); + var maxBytes = new Vector4(255f); + + for (int i = 0; i < count; i++) + { + Vector4 s = Unsafe.Add(ref sBase, i); + s *= maxBytes; + s += half; + + // I'm not sure if Vector4.Clamp() is properly implemented with intrinsics. + s = Vector4.Max(Vector4.Zero, s); + s = Vector4.Min(maxBytes, s); + + ref ByteVector4 d = ref Unsafe.Add(ref dBase, i); + d.X = (byte)s.X; + d.Y = (byte)s.Y; + d.Z = (byte)s.Z; + d.W = (byte)s.W; + } + } + + [StructLayout(LayoutKind.Sequential)] + private struct ByteVector4 + { + public byte X; + public byte Y; + public byte Z; + public byte W; + } + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs new file mode 100644 index 0000000000..737e620061 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -0,0 +1,185 @@ +// Copyright (c) Six Labors and contributors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp +{ + /// + /// Various extension and utility methods for and utilizing SIMD capabilities + /// + internal static partial class SimdUtils + { + /// + /// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte. + /// + public static bool IsAvx2CompatibleArchitecture { get; } = + Vector.IsHardwareAccelerated && Vector.Count == 8 && Vector.Count == 8; + + /// + /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics. + /// + /// The vector + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector4 PseudoRound(this Vector4 v) + { + var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1)); + + return v + (sign * 0.5f); + } + + /// + /// Rounds all values in 'v' to the nearest integer following semantics. + /// Source: + /// + /// https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110 + /// + /// + /// The vector + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector FastRound(this Vector v) + { + Vector magic0 = new Vector(int.MinValue); // 0x80000000 + Vector sgn0 = Vector.AsVectorSingle(magic0); + Vector and0 = Vector.BitwiseAnd(sgn0, v); + Vector or0 = Vector.BitwiseOr(and0, new Vector(8388608.0f)); + Vector add0 = Vector.Add(v, or0); + Vector sub0 = Vector.Subtract(add0, or0); + return sub0; + } + + /// + /// Converts all input -s to -s normalized into [0..1]. + /// should be the of the same size as , + /// but there are no restrictions on the span's length. + /// + /// The source span of bytes + /// The destination span of floats + [MethodImpl(InliningOptions.ShortMethod)] + internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + +#if NETCOREAPP2_1 + ExtendedIntrinsics.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); +#else + BasicIntrinsics256.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); +#endif + FallbackIntrinsics128.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); + + // Deal with the remainder: + if (source.Length > 0) + { + ConverByteToNormalizedFloatRemainder(source, dest); + } + } + + /// + /// Convert all values normalized into [0..1] from 'source' into 'dest' buffer of . + /// The values are scaled up into [0-255] and rounded, overflows are clamped. + /// should be the of the same size as , + /// but there are no restrictions on the span's length. + /// + /// The source span of floats + /// The destination span of bytes + [MethodImpl(InliningOptions.ShortMethod)] + internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + +#if NETCOREAPP2_1 + ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); +#else + BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); +#endif + FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); + + // Deal with the remainder: + if (source.Length > 0) + { + ConvertNormalizedFloatToByteRemainder(source, dest); + } + } + + [MethodImpl(InliningOptions.ColdPath)] + private static void ConverByteToNormalizedFloatRemainder(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref float dBase = ref MemoryMarshal.GetReference(dest); + + // There are at most 3 elements at this point, having a for loop is overkill. + // Let's minimize the no. of instructions! + switch (source.Length) + { + case 3: + Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2) / 255f; + goto case 2; + case 2: + Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1) / 255f; + goto case 1; + case 1: + dBase = sBase / 255f; + break; + } + } + + [MethodImpl(InliningOptions.ColdPath)] + private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan source, Span dest) + { + ref float sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + switch (source.Length) + { + case 3: + Unsafe.Add(ref dBase, 2) = ConvertToByte(Unsafe.Add(ref sBase, 2)); + goto case 2; + case 2: + Unsafe.Add(ref dBase, 1) = ConvertToByte(Unsafe.Add(ref sBase, 1)); + goto case 1; + case 1: + dBase = ConvertToByte(sBase); + break; + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static byte ConvertToByte(float f) => (byte)ComparableExtensions.Clamp((f * 255f) + 0.5f, 0, 255f); + + [Conditional("DEBUG")] + private static void VerifyIsAvx2Compatible(string operation) + { + if (!IsAvx2CompatibleArchitecture) + { + throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!"); + } + } + + [Conditional("DEBUG")] + private static void VerifySpanInput(ReadOnlySpan source, Span dest, int shouldBeDivisibleBy) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + DebugGuard.IsTrue( + ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0, + nameof(source), + $"length should be divisable by {shouldBeDivisibleBy}!"); + } + + [Conditional("DEBUG")] + private static void VerifySpanInput(ReadOnlySpan source, Span dest, int shouldBeDivisibleBy) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + DebugGuard.IsTrue( + ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0, + nameof(source), + $"length should be divisable by {shouldBeDivisibleBy}!"); + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Tuples/Octet.cs b/src/ImageSharp/Common/Tuples/Octet.cs new file mode 100644 index 0000000000..539b74e324 --- /dev/null +++ b/src/ImageSharp/Common/Tuples/Octet.cs @@ -0,0 +1,109 @@ +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp.Tuples +{ + /// + /// Contains 8 element value tuples of various types. + /// + internal static class Octet + { + /// + /// Value tuple of -s + /// + [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))] + public struct OfUInt32 + { + [FieldOffset(0 * sizeof(uint))] + public uint V0; + + [FieldOffset(1 * sizeof(uint))] + public uint V1; + + [FieldOffset(2 * sizeof(uint))] + public uint V2; + + [FieldOffset(3 * sizeof(uint))] + public uint V3; + + [FieldOffset(4 * sizeof(uint))] + public uint V4; + + [FieldOffset(5 * sizeof(uint))] + public uint V5; + + [FieldOffset(6 * sizeof(uint))] + public uint V6; + + [FieldOffset(7 * sizeof(uint))] + public uint V7; + + public override string ToString() + { + return $"{nameof(Octet)}.{nameof(OfUInt32)}({this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7})"; + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void LoadFrom(ref OfByte src) + { + this.V0 = src.V0; + this.V1 = src.V1; + this.V2 = src.V2; + this.V3 = src.V3; + this.V4 = src.V4; + this.V5 = src.V5; + this.V6 = src.V6; + this.V7 = src.V7; + } + } + + /// + /// Value tuple of -s + /// + [StructLayout(LayoutKind.Explicit, Size = 8)] + public struct OfByte + { + [FieldOffset(0)] + public byte V0; + + [FieldOffset(1)] + public byte V1; + + [FieldOffset(2)] + public byte V2; + + [FieldOffset(3)] + public byte V3; + + [FieldOffset(4)] + public byte V4; + + [FieldOffset(5)] + public byte V5; + + [FieldOffset(6)] + public byte V6; + + [FieldOffset(7)] + public byte V7; + + public override string ToString() + { + return $"{nameof(Octet)}.{nameof(OfByte)}({this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7})"; + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void LoadFrom(ref OfUInt32 src) + { + this.V0 = (byte)src.V0; + this.V1 = (byte)src.V1; + this.V2 = (byte)src.V2; + this.V3 = (byte)src.V3; + this.V4 = (byte)src.V4; + this.V5 = (byte)src.V5; + this.V6 = (byte)src.V6; + this.V7 = (byte)src.V7; + } + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Tuples/Vector4Pair.cs b/src/ImageSharp/Common/Tuples/Vector4Pair.cs index 309d5e2e56..cae283d628 100644 --- a/src/ImageSharp/Common/Tuples/Vector4Pair.cs +++ b/src/ImageSharp/Common/Tuples/Vector4Pair.cs @@ -2,11 +2,12 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -namespace SixLabors.ImageSharp.Common.Tuples +namespace SixLabors.ImageSharp.Tuples { /// /// Its faster to process multiple Vector4-s together, so let's pair them! /// On AVX2 this pair should be convertible to of ! + /// TODO: Investigate defining this as union with an Octet.OfSingle type. /// [StructLayout(LayoutKind.Sequential)] internal struct Vector4Pair @@ -15,8 +16,6 @@ internal struct Vector4Pair public Vector4 B; - private static readonly Vector4 Scale = new Vector4(1 / 255f); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public void MultiplyInplace(float value) { @@ -52,8 +51,9 @@ internal void RoundAndDownscalePreAvx2() b = b.FastRound(); // Downscale by 1/255 - this.A *= Scale; - this.B *= Scale; + var scale = new Vector4(1 / 255f); + this.A *= scale; + this.B *= scale; } /// @@ -74,7 +74,7 @@ internal void RoundAndDownscaleAvx2() public override string ToString() { - return $"{this.A}, {this.B}"; + return $"{nameof(Vector4Pair)}({this.A}, {this.B})"; } } } \ No newline at end of file diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs index 4b2626c582..1dc72aaf5b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs @@ -6,7 +6,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Common.Tuples; +using SixLabors.ImageSharp.Tuples; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { @@ -109,7 +109,7 @@ internal static void ConvertCore(in ComponentValues values, Span result // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Collect(ref r, ref g, ref b); + destination.Pack(ref r, ref g, ref b); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index ab4947e65c..46644258b1 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -6,7 +6,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Common.Tuples; +using SixLabors.ImageSharp.Tuples; // ReSharper disable ImpureMethodCallOnReadonlyValueField namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters @@ -102,7 +102,7 @@ internal static void ConvertCore(in ComponentValues values, Span result // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Collect(ref rr, ref gg, ref bb); + destination.Pack(ref rr, ref gg, ref bb); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 60abb7fb2c..456636dc39 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -6,8 +6,8 @@ using System.Linq; using System.Numerics; -using SixLabors.ImageSharp.Common.Tuples; using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.Tuples; using SixLabors.Memory; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters @@ -157,9 +157,9 @@ internal struct Vector4Octet public Vector4 V0, V1, V2, V3, V4, V5, V6, V7; /// - /// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order. + /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... /// - public void Collect(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b) + public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b) { this.V0.X = r.A.X; this.V0.Y = g.A.X; diff --git a/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs index 2629ce3f79..bb42ec7e34 100644 --- a/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs @@ -3,7 +3,6 @@ using System; using System.Numerics; -using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.Memory; @@ -19,100 +18,18 @@ public partial struct Rgba32 /// internal partial class PixelOperations : PixelOperations { - /// - /// SIMD optimized bulk implementation of - /// that works only with `count` divisible by . - /// - /// The to the source colors. - /// The to the dstination vectors. - /// The number of pixels to convert. - /// - /// Implementation adapted from: - /// - /// http://stackoverflow.com/a/5362789 - /// - /// TODO: We can replace this implementation in the future using new Vector API-s: - /// - /// https://github.com/dotnet/corefx/issues/15957 - /// - /// - internal static void ToVector4SimdAligned(ReadOnlySpan sourceColors, Span destVectors, int count) - { - if (!Vector.IsHardwareAccelerated) - { - throw new InvalidOperationException( - "Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!"); - } - - DebugGuard.IsTrue( - count % Vector.Count == 0, - nameof(count), - "Argument 'count' should divisible by Vector.Count!"); - - var bVec = new Vector(256.0f / 255.0f); - var magicFloat = new Vector(32768.0f); - var magicInt = new Vector(1191182336); // reinterpreded value of 32768.0f - var mask = new Vector(255); - - int unpackedRawCount = count * 4; - - ref uint sourceBase = ref Unsafe.As(ref MemoryMarshal.GetReference(sourceColors)); - ref UnpackedRGBA destBaseAsUnpacked = ref Unsafe.As(ref MemoryMarshal.GetReference(destVectors)); - ref Vector destBaseAsUInt = ref Unsafe.As>(ref destBaseAsUnpacked); - ref Vector destBaseAsFloat = ref Unsafe.As>(ref destBaseAsUnpacked); - - for (int i = 0; i < count; i++) - { - uint sVal = Unsafe.Add(ref sourceBase, i); - ref UnpackedRGBA dst = ref Unsafe.Add(ref destBaseAsUnpacked, i); - - // This call is the bottleneck now: - dst.Load(sVal); - } - - int numOfVectors = unpackedRawCount / Vector.Count; - - for (int i = 0; i < numOfVectors; i++) - { - Vector vi = Unsafe.Add(ref destBaseAsUInt, i); - - vi &= mask; - vi |= magicInt; - - var vf = Vector.AsVectorSingle(vi); - vf = (vf - magicFloat) * bVec; - - Unsafe.Add(ref destBaseAsFloat, i) = vf; - } - } - /// internal override void ToVector4(ReadOnlySpan sourceColors, Span destinationVectors, int count) { Guard.MustBeSizedAtLeast(sourceColors, count, nameof(sourceColors)); Guard.MustBeSizedAtLeast(destinationVectors, count, nameof(destinationVectors)); - if (count < 256 || !Vector.IsHardwareAccelerated) - { - // Doesn't worth to bother with SIMD: - base.ToVector4(sourceColors, destinationVectors, count); - return; - } - - int remainder = count % Vector.Count; - int alignedCount = count - remainder; + sourceColors = sourceColors.Slice(0, count); + destinationVectors = destinationVectors.Slice(0, count); - if (alignedCount > 0) - { - ToVector4SimdAligned(sourceColors, destinationVectors, alignedCount); - } - - if (remainder > 0) - { - sourceColors = sourceColors.Slice(alignedCount); - destinationVectors = destinationVectors.Slice(alignedCount); - base.ToVector4(sourceColors, destinationVectors, remainder); - } + SimdUtils.BulkConvertByteToNormalizedFloat( + MemoryMarshal.Cast(sourceColors), + MemoryMarshal.Cast(destinationVectors)); } /// @@ -120,29 +37,12 @@ internal override void PackFromVector4(ReadOnlySpan sourceVectors, Span { GuardSpans(sourceVectors, nameof(sourceVectors), destinationColors, nameof(destinationColors), count); - if (!SimdUtils.IsAvx2CompatibleArchitecture) - { - base.PackFromVector4(sourceVectors, destinationColors, count); - return; - } - - int remainder = count % 2; - int alignedCount = count - remainder; + sourceVectors = sourceVectors.Slice(0, count); + destinationColors = destinationColors.Slice(0, count); - if (alignedCount > 0) - { - ReadOnlySpan flatSrc = MemoryMarshal.Cast(sourceVectors.Slice(0, alignedCount)); - Span flatDest = MemoryMarshal.Cast(destinationColors); - - SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(flatSrc, flatDest); - } - - if (remainder > 0) - { - // actually: remainder == 1 - int lastIdx = count - 1; - destinationColors[lastIdx].PackFromVector4(sourceVectors[lastIdx]); - } + SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows( + MemoryMarshal.Cast(sourceVectors), + MemoryMarshal.Cast(destinationColors)); } /// @@ -172,30 +72,6 @@ internal override void ToRgba32(ReadOnlySpan sourcePixels, Span sourcePixels.Slice(0, count).CopyTo(dest); } - - /// - /// Value type to store -s unpacked into multiple -s. - /// - [StructLayout(LayoutKind.Sequential)] - private struct UnpackedRGBA - { - private uint r; - - private uint g; - - private uint b; - - private uint a; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void Load(uint p) - { - this.r = p; - this.g = p >> GreenShift; - this.b = p >> BlueShift; - this.a = p >> AlphaShift; - } - } } } } \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs index a5fa59ba07..eaa52a9750 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs @@ -3,6 +3,7 @@ // ReSharper disable InconsistentNaming +using System; using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; @@ -19,11 +20,14 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk public abstract class PackFromVector4 where TPixel : struct, IPixel { - private IMemoryOwner source; + protected IMemoryOwner source; - private IMemoryOwner destination; + protected IMemoryOwner destination; - [Params(16, 128, 512)] + [Params( + 64, + 2048 + )] public int Count { get; set; } [GlobalSetup] @@ -40,7 +44,7 @@ public void Cleanup() this.source.Dispose(); } - [Benchmark(Baseline = true)] + //[Benchmark] public void PerElement() { ref Vector4 s = ref MemoryMarshal.GetReference(this.source.GetSpan()); @@ -53,13 +57,13 @@ public void PerElement() } [Benchmark] - public void CommonBulk() + public void PixelOperations_Base() { new PixelOperations().PackFromVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count); } [Benchmark] - public void OptimizedBulk() + public void PixelOperations_Specialized() { PixelOperations.Instance.PackFromVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count); } @@ -67,6 +71,58 @@ public void OptimizedBulk() public class PackFromVector4_Rgba32 : PackFromVector4 { + [Benchmark] + public void FallbackIntrinsics128() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats); + } + + [Benchmark(Baseline = true)] + public void BasicIntrinsics256() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats); + } + + [Benchmark] + public void ExtendedIntrinsic() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats); + } + // RESULTS (2018 October): + // Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated | + // ---------------------------- |-------- |------ |-------------:|-------------:|------------:|-------:|---------:|-------:|----------:| + // FallbackIntrinsics128 | Clr | 64 | 340.38 ns | 22.319 ns | 1.2611 ns | 1.41 | 0.01 | - | 0 B | + // BasicIntrinsics256 | Clr | 64 | 240.79 ns | 11.421 ns | 0.6453 ns | 1.00 | 0.00 | - | 0 B | + // ExtendedIntrinsic | Clr | 64 | 199.09 ns | 124.239 ns | 7.0198 ns | 0.83 | 0.02 | - | 0 B | + // PixelOperations_Base | Clr | 64 | 647.99 ns | 24.003 ns | 1.3562 ns | 2.69 | 0.01 | 0.0067 | 24 B | + // PixelOperations_Specialized | Clr | 64 | 259.79 ns | 13.391 ns | 0.7566 ns | 1.08 | 0.00 | - | 0 B | <--- ceremonial overhead has been minimized! + // | | | | | | | | | | + // FallbackIntrinsics128 | Core | 64 | 234.64 ns | 12.320 ns | 0.6961 ns | 1.58 | 0.00 | - | 0 B | + // BasicIntrinsics256 | Core | 64 | 148.87 ns | 2.794 ns | 0.1579 ns | 1.00 | 0.00 | - | 0 B | + // ExtendedIntrinsic | Core | 64 | 94.06 ns | 10.015 ns | 0.5659 ns | 0.63 | 0.00 | - | 0 B | + // PixelOperations_Base | Core | 64 | 573.52 ns | 31.865 ns | 1.8004 ns | 3.85 | 0.01 | 0.0067 | 24 B | + // PixelOperations_Specialized | Core | 64 | 117.21 ns | 13.264 ns | 0.7494 ns | 0.79 | 0.00 | - | 0 B | + // | | | | | | | | | | + // FallbackIntrinsics128 | Clr | 2048 | 6,735.93 ns | 2,139.340 ns | 120.8767 ns | 1.71 | 0.03 | - | 0 B | + // BasicIntrinsics256 | Clr | 2048 | 3,929.29 ns | 334.027 ns | 18.8731 ns | 1.00 | 0.00 | - | 0 B | + // ExtendedIntrinsic | Clr | 2048 | 2,226.01 ns | 130.525 ns | 7.3749 ns |!! 0.57 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock! + // PixelOperations_Base | Clr | 2048 | 16,760.84 ns | 367.800 ns | 20.7814 ns | 4.27 | 0.02 | - | 24 B | <--- Extra copies using "Vector4 TPixel.ToVector4()" + // PixelOperations_Specialized | Clr | 2048 | 3,986.03 ns | 237.238 ns | 13.4044 ns | 1.01 | 0.00 | - | 0 B | <--- can't yet detect whether ExtendedIntrinsics are available :( + // | | | | | | | | | | + // FallbackIntrinsics128 | Core | 2048 | 6,644.65 ns | 2,677.090 ns | 151.2605 ns | 1.69 | 0.05 | - | 0 B | + // BasicIntrinsics256 | Core | 2048 | 3,923.70 ns | 1,971.760 ns | 111.4081 ns | 1.00 | 0.00 | - | 0 B | + // ExtendedIntrinsic | Core | 2048 | 2,092.32 ns | 375.657 ns | 21.2253 ns |!! 0.53 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock! + // PixelOperations_Base | Core | 2048 | 16,875.73 ns | 1,271.957 ns | 71.8679 ns | 4.30 | 0.10 | - | 24 B | + // PixelOperations_Specialized | Core | 2048 | 2,129.92 ns | 262.888 ns | 14.8537 ns |!! 0.54 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock! } } \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs index 50fac25139..2cbe549e4a 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs @@ -6,8 +6,14 @@ using System.Buffers; using System; using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Attributes.Jobs; +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Environments; +using BenchmarkDotNet.Jobs; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -17,11 +23,17 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk public abstract class ToVector4 where TPixel : struct, IPixel { - private IMemoryOwner source; + protected IMemoryOwner source; - private IMemoryOwner destination; + protected IMemoryOwner destination; - [Params(64, 300, 1024)] + [Params( + 64, + //256, + //512, + //1024, + 2048 + )] public int Count { get; set; } [GlobalSetup] @@ -38,7 +50,7 @@ public void Cleanup() this.destination.Dispose(); } - [Benchmark(Baseline = true)] + //[Benchmark] public void PerElement() { Span s = this.source.GetSpan(); @@ -46,25 +58,163 @@ public void PerElement() for (int i = 0; i < this.Count; i++) { - TPixel c = s[i]; - d[i] = c.ToVector4(); + d[i] = s[i].ToVector4(); } } [Benchmark] - public void CommonBulk() + public void PixelOperations_Base() { new PixelOperations().ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count); } [Benchmark] - public void OptimizedBulk() + public void PixelOperations_Specialized() { PixelOperations.Instance.ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count); } } + [Config(typeof(Config.ShortClr))] public class ToVector4_Rgba32 : ToVector4 { + [Benchmark] + public void FallbackIntrinsics128() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.FallbackIntrinsics128.BulkConvertByteToNormalizedFloat(sBytes, dFloats); + } + + [Benchmark(Baseline = true)] + public void BasicIntrinsics256() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.BasicIntrinsics256.BulkConvertByteToNormalizedFloat(sBytes, dFloats); + } + + [Benchmark] + public void ExtendedIntrinsics() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(sBytes, dFloats); + } + + //[Benchmark] + public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + int n = dFloats.Length / Vector.Count; + + ref Vector sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference((ReadOnlySpan)sBytes)); + ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dFloats)); + ref Vector destBaseU = ref Unsafe.As, Vector>(ref destBase); + + for (int i = 0; i < n; i++) + { + Vector b = Unsafe.Add(ref sourceBase, i); + + Vector.Widen(b, out Vector s0, out Vector s1); + Vector.Widen(s0, out Vector w0, out Vector w1); + Vector.Widen(s1, out Vector w2, out Vector w3); + + ref Vector d = ref Unsafe.Add(ref destBaseU, i * 4); + d = w0; + Unsafe.Add(ref d, 1) = w1; + Unsafe.Add(ref d, 2) = w2; + Unsafe.Add(ref d, 3) = w3; + } + + n = dFloats.Length / Vector.Count; + var scale = new Vector(1f / 255f); + + for (int i = 0; i < n; i++) + { + ref Vector dRef = ref Unsafe.Add(ref destBase, i); + + Vector du = Vector.AsVectorInt32(dRef); + Vector v = Vector.ConvertToSingle(du); + v *= scale; + + dRef = v; + } + } + + //[Benchmark] + public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_ConvertInSameLoop() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + int n = dFloats.Length / Vector.Count; + + ref Vector sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference((ReadOnlySpan)sBytes)); + ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dFloats)); + var scale = new Vector(1f / 255f); + + for (int i = 0; i < n; i++) + { + Vector b = Unsafe.Add(ref sourceBase, i); + + Vector.Widen(b, out Vector s0, out Vector s1); + Vector.Widen(s0, out Vector w0, out Vector w1); + Vector.Widen(s1, out Vector w2, out Vector w3); + + Vector f0 = ConvertToNormalizedSingle(w0, scale); + Vector f1 = ConvertToNormalizedSingle(w1, scale); + Vector f2 = ConvertToNormalizedSingle(w2, scale); + Vector f3 = ConvertToNormalizedSingle(w3, scale); + + ref Vector d = ref Unsafe.Add(ref destBase, i * 4); + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector ConvertToNormalizedSingle(Vector u, Vector scale) + { + Vector vi = Vector.AsVectorInt32(u); + Vector v = Vector.ConvertToSingle(vi); + v *= scale; + return v; + } + + // RESULTS (2018 October): + // + // Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated | + // ---------------------------- |-------- |------ |------------:|-------------:|------------:|-------:|---------:|-------:|----------:| + // FallbackIntrinsics128 | Clr | 64 | 287.62 ns | 6.026 ns | 0.3405 ns | 1.19 | 0.00 | - | 0 B | + // BasicIntrinsics256 | Clr | 64 | 240.83 ns | 10.585 ns | 0.5981 ns | 1.00 | 0.00 | - | 0 B | + // ExtendedIntrinsics | Clr | 64 | 168.28 ns | 11.478 ns | 0.6485 ns | 0.70 | 0.00 | - | 0 B | + // PixelOperations_Base | Clr | 64 | 334.08 ns | 38.048 ns | 2.1498 ns | 1.39 | 0.01 | 0.0072 | 24 B | + // PixelOperations_Specialized | Clr | 64 | 255.41 ns | 10.939 ns | 0.6181 ns | 1.06 | 0.00 | - | 0 B | <--- ceremonial overhead has been minimized! + // | | | | | | | | | | + // FallbackIntrinsics128 | Core | 64 | 183.29 ns | 8.931 ns | 0.5046 ns | 1.32 | 0.00 | - | 0 B | + // BasicIntrinsics256 | Core | 64 | 139.18 ns | 7.633 ns | 0.4313 ns | 1.00 | 0.00 | - | 0 B | + // ExtendedIntrinsics | Core | 64 | 66.29 ns | 16.366 ns | 0.9247 ns | 0.48 | 0.01 | - | 0 B | + // PixelOperations_Base | Core | 64 | 257.75 ns | 16.959 ns | 0.9582 ns | 1.85 | 0.01 | 0.0072 | 24 B | + // PixelOperations_Specialized | Core | 64 | 90.14 ns | 9.955 ns | 0.5625 ns | 0.65 | 0.00 | - | 0 B | + // | | | | | | | | | | + // FallbackIntrinsics128 | Clr | 2048 | 5,011.84 ns | 347.991 ns | 19.6621 ns | 1.22 | 0.01 | - | 0 B | + // BasicIntrinsics256 | Clr | 2048 | 4,119.35 ns | 720.153 ns | 40.6900 ns | 1.00 | 0.00 | - | 0 B | + // ExtendedIntrinsics | Clr | 2048 | 1,195.29 ns | 164.389 ns | 9.2883 ns |!! 0.29 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock! + // PixelOperations_Base | Clr | 2048 | 6,820.58 ns | 823.433 ns | 46.5255 ns | 1.66 | 0.02 | - | 24 B | + // PixelOperations_Specialized | Clr | 2048 | 4,203.53 ns | 176.714 ns | 9.9847 ns | 1.02 | 0.01 | - | 0 B | <--- can't yet detect whether ExtendedIntrinsics are available :( + // | | | | | | | | | | + // FallbackIntrinsics128 | Core | 2048 | 5,017.89 ns | 4,021.533 ns | 227.2241 ns | 1.24 | 0.05 | - | 0 B | + // BasicIntrinsics256 | Core | 2048 | 4,046.51 ns | 1,150.390 ns | 64.9992 ns | 1.00 | 0.00 | - | 0 B | + // ExtendedIntrinsics | Core | 2048 | 1,130.59 ns | 832.588 ns | 47.0427 ns |!! 0.28 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock! + // PixelOperations_Base | Core | 2048 | 6,752.68 ns | 272.820 ns | 15.4148 ns | 1.67 | 0.02 | - | 24 B | + // PixelOperations_Specialized | Core | 2048 | 1,126.13 ns | 79.192 ns | 4.4745 ns |!! 0.28 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock! } } \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/General/Abs.cs b/tests/ImageSharp.Benchmarks/General/BasicMath/Abs.cs similarity index 88% rename from tests/ImageSharp.Benchmarks/General/Abs.cs rename to tests/ImageSharp.Benchmarks/General/BasicMath/Abs.cs index a67f3f1078..ea53959b6a 100644 --- a/tests/ImageSharp.Benchmarks/General/Abs.cs +++ b/tests/ImageSharp.Benchmarks/General/BasicMath/Abs.cs @@ -1,9 +1,9 @@ -namespace SixLabors.ImageSharp.Benchmarks.General -{ - using System; +using System; - using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Attributes; +namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath +{ public class Abs { [Params(-1, 1)] diff --git a/tests/ImageSharp.Benchmarks/General/BasicMath/ClampFloat.cs b/tests/ImageSharp.Benchmarks/General/BasicMath/ClampFloat.cs new file mode 100644 index 0000000000..3b7dea0955 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/BasicMath/ClampFloat.cs @@ -0,0 +1,70 @@ +using System; +using System.Runtime.CompilerServices; + +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Running; + +namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath +{ + public class ClampFloat + { + private readonly float min = -1.5f; + private readonly float max = 2.5f; + private static readonly float[] Values = { -10, -5, -3, -1.5f, -0.5f, 0f, 1f, 1.5f, 2.5f, 3, 10 }; + + [Benchmark(Baseline = true)] + public float UsingMathF() + { + float acc = 0; + + for (int i = 0; i < Values.Length; i++) + { + acc += ClampUsingMathF(Values[i], this.min, this.max); + } + + return acc; + } + + [Benchmark] + public float UsingBranching() + { + float acc = 0; + + for (int i = 0; i < Values.Length; i++) + { + acc += ClampUsingBranching(Values[i], this.min, this.max); + } + + return acc; + } + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static float ClampUsingMathF(float x, float min, float max) + { + return Math.Min(max, Math.Max(min, x)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static float ClampUsingBranching(float x, float min, float max) + { + if (x >= max) + { + return max; + } + + if (x <= min) + { + return min; + } + + return x; + } + + // RESULTS: + // Method | Mean | Error | StdDev | Scaled | + // --------------- |---------:|----------:|----------:|-------:| + // UsingMathF | 30.37 ns | 0.3764 ns | 0.3337 ns | 1.00 | + // UsingBranching | 18.66 ns | 0.1043 ns | 0.0871 ns | 0.61 | + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/General/Clamp.cs b/tests/ImageSharp.Benchmarks/General/BasicMath/ClampInt32IntoByte.cs similarity index 93% rename from tests/ImageSharp.Benchmarks/General/Clamp.cs rename to tests/ImageSharp.Benchmarks/General/BasicMath/ClampInt32IntoByte.cs index ef6bc3c402..6ce82ba115 100644 --- a/tests/ImageSharp.Benchmarks/General/Clamp.cs +++ b/tests/ImageSharp.Benchmarks/General/BasicMath/ClampInt32IntoByte.cs @@ -3,14 +3,14 @@ // Licensed under the Apache License, Version 2.0. // -namespace SixLabors.ImageSharp.Benchmarks.General -{ - using System; - using System.Runtime.CompilerServices; +using System; +using System.Runtime.CompilerServices; - using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Attributes; - public class Clamp +namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath +{ + public class ClampInt32IntoByte { [Params(-1, 0, 255, 256)] public int Value { get; set; } diff --git a/tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoConstant.cs b/tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoConstant.cs new file mode 100644 index 0000000000..9ddfad7222 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoConstant.cs @@ -0,0 +1,23 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Attributes.Jobs; + +namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath +{ + [LongRunJob] + public class ModuloPowerOfTwoConstant + { + private readonly int value = 42; + + [Benchmark(Baseline = true)] + public int Standard() + { + return this.value % 8; + } + + [Benchmark] + public int Bitwise() + { + return ImageMaths.Modulo8(this.value); + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoVariable.cs b/tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoVariable.cs new file mode 100644 index 0000000000..5c2fe81fa2 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoVariable.cs @@ -0,0 +1,32 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Attributes.Jobs; + +namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath +{ + [LongRunJob] + public class ModuloPowerOfTwoVariable + { + private readonly int value = 42; + + private readonly int m = 32; + + [Benchmark(Baseline = true)] + public int Standard() + { + return this.value % this.m; + } + + [Benchmark] + public int Bitwise() + { + return ImageMaths.ModuloP2(this.value, this.m); + } + + // RESULTS: + // + // Method | Mean | Error | StdDev | Median | Scaled | ScaledSD | + // --------- |----------:|----------:|----------:|----------:|-------:|---------:| + // Standard | 1.2465 ns | 0.0093 ns | 0.0455 ns | 1.2423 ns | 1.00 | 0.00 | + // Bitwise | 0.0265 ns | 0.0103 ns | 0.0515 ns | 0.0000 ns | 0.02 | 0.04 | + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/General/Pow.cs b/tests/ImageSharp.Benchmarks/General/BasicMath/Pow.cs similarity index 93% rename from tests/ImageSharp.Benchmarks/General/Pow.cs rename to tests/ImageSharp.Benchmarks/General/BasicMath/Pow.cs index 325bd9d20e..0f256fc781 100644 --- a/tests/ImageSharp.Benchmarks/General/Pow.cs +++ b/tests/ImageSharp.Benchmarks/General/BasicMath/Pow.cs @@ -1,7 +1,8 @@ using System; + using BenchmarkDotNet.Attributes; -namespace SixLabors.ImageSharp.Benchmarks.General +namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath { public class Pow { diff --git a/tests/ImageSharp.Benchmarks/General/Modulus.cs b/tests/ImageSharp.Benchmarks/General/Modulus.cs deleted file mode 100644 index e6d5ccce62..0000000000 --- a/tests/ImageSharp.Benchmarks/General/Modulus.cs +++ /dev/null @@ -1,19 +0,0 @@ -namespace SixLabors.ImageSharp.Benchmarks.General -{ - using BenchmarkDotNet.Attributes; - - public class Modulus - { - [Benchmark(Baseline = true, Description = "Standard Modulus using %")] - public int StandardModulus() - { - return 255 % 256; - } - - [Benchmark(Description = "Bitwise Modulus using &")] - public int BitwiseModulus() - { - return 255 & 255; - } - } -} diff --git a/tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs b/tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs new file mode 100644 index 0000000000..ca85a350cc --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs @@ -0,0 +1,113 @@ +using System.Numerics; +using System.Runtime.CompilerServices; + +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization +{ + [Config(typeof(Config.ShortClr))] + public class UInt32ToSingle + { + private float[] data; + + private const int Count = 32; + + [GlobalSetup] + public void Setup() + { + this.data = new float[Count]; + } + + [Benchmark(Baseline = true)] + public void MagicMethod() + { + ref Vector b = ref Unsafe.As>(ref this.data[0]); + + int n = Count / Vector.Count; + + var bVec = new Vector(256.0f / 255.0f); + var magicFloat = new Vector(32768.0f); + var magicInt = new Vector(1191182336); // reinterpreded value of 32768.0f + var mask = new Vector(255); + + for (int i = 0; i < n; i++) + { + // union { float f; uint32_t i; } u; + // u.f = 32768.0f + x * (255.0f / 256.0f); + // return (uint8_t)u.i; + + ref Vector df = ref Unsafe.Add(ref b, i); + + var vi = Vector.AsVectorUInt32(df); + vi &= mask; + vi |= magicInt; + + var vf = Vector.AsVectorSingle(vi); + vf = (vf - magicFloat) * bVec; + + df = vf; + } + } + + [Benchmark] + public void StandardSimd() + { + int n = Count / Vector.Count; + + ref Vector bf = ref Unsafe.As>(ref this.data[0]); + ref Vector bu = ref Unsafe.As, Vector>(ref bf); + + var scale = new Vector(1f / 255f); + + for (int i = 0; i < n; i++) + { + Vector u = Unsafe.Add(ref bu, i); + Vector v = Vector.ConvertToSingle(u); + v *= scale; + Unsafe.Add(ref bf, i) = v; + } + } + + [Benchmark] + public void StandardSimdFromInt() + { + int n = Count / Vector.Count; + + ref Vector bf = ref Unsafe.As>(ref this.data[0]); + ref Vector bu = ref Unsafe.As, Vector>(ref bf); + + var scale = new Vector(1f / 255f); + + for (int i = 0; i < n; i++) + { + Vector u = Unsafe.Add(ref bu, i); + Vector v = Vector.ConvertToSingle(u); + v *= scale; + Unsafe.Add(ref bf, i) = v; + } + } + + + [Benchmark] + public void StandardSimdFromInt_RefCast() + { + int n = Count / Vector.Count; + + ref Vector bf = ref Unsafe.As>(ref this.data[0]); + ref Vector bu = ref Unsafe.As, Vector>(ref bf); + + var scale = new Vector(1f / 255f); + + for (int i = 0; i < n; i++) + { + ref Vector fRef = ref Unsafe.Add(ref bf, i); + + Vector du = Vector.AsVectorInt32(fRef); + Vector v = Vector.ConvertToSingle(du); + v *= scale; + + fRef = v; + } + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs b/tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs new file mode 100644 index 0000000000..2bc3af4c98 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs @@ -0,0 +1,64 @@ +using System.Numerics; +using System.Runtime.CompilerServices; + +using BenchmarkDotNet.Attributes; + +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization +{ + [Config(typeof(Config.ShortClr))] + public class WidenBytesToUInt32 + { + private byte[] source; + + private uint[] dest; + + private const int Count = 64; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[Count]; + this.dest = new uint[Count]; + } + + [Benchmark(Baseline = true)] + public void Standard() + { + const int N = Count / 8; + + ref Octet.OfByte sBase = ref Unsafe.As(ref this.source[0]); + ref Octet.OfUInt32 dBase = ref Unsafe.As(ref this.dest[0]); + + for (int i = 0; i < N; i++) + { + Unsafe.Add(ref dBase, i).LoadFrom(ref Unsafe.Add(ref sBase, i)); + } + } + + [Benchmark] + public void Simd() + { + int n = Count / Vector.Count; + + ref Vector sBase = ref Unsafe.As>(ref this.source[0]); + ref Vector dBase = ref Unsafe.As>(ref this.dest[0]); + + for (int i = 0; i < n; i++) + { + Vector b = Unsafe.Add(ref sBase, i); + + Vector.Widen(b, out Vector s0, out Vector s1); + Vector.Widen(s0, out Vector w0, out Vector w1); + Vector.Widen(s1, out Vector w2, out Vector w3); + + ref Vector d = ref Unsafe.Add(ref dBase, i * 4); + d = w0; + Unsafe.Add(ref d, 1) = w1; + Unsafe.Add(ref d, 2) = w2; + Unsafe.Add(ref d, 3) = w3; + } + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index c6c3b68f33..c63cb3438f 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -62,7 +62,7 @@ private static Vector CreateRandomTestVector(int seed, float min, float m { float[] data = new float[Vector.Count]; - var rnd = new Random(); + var rnd = new Random(seed); for (int i = 0; i < Vector.Count; i++) { @@ -118,7 +118,7 @@ private bool SkipOnNonAvx2([CallerMemberName] string testCaseName = null) [InlineData(1, 8)] [InlineData(2, 16)] [InlineData(3, 128)] - public void BulkConvertNormalizedFloatToByte_WithRoundedData(int seed, int count) + public void BasicIntrinsics256_BulkConvertNormalizedFloatToByte_WithRoundedData(int seed, int count) { if (this.SkipOnNonAvx2()) { @@ -130,7 +130,7 @@ public void BulkConvertNormalizedFloatToByte_WithRoundedData(int seed, int count byte[] dest = new byte[count]; - SimdUtils.BulkConvertNormalizedFloatToByte(normalized, dest); + SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByte(normalized, dest); byte[] expected = orig.Select(f => (byte)(f)).ToArray(); @@ -142,7 +142,7 @@ public void BulkConvertNormalizedFloatToByte_WithRoundedData(int seed, int count [InlineData(1, 8)] [InlineData(2, 16)] [InlineData(3, 128)] - public void BulkConvertNormalizedFloatToByte_WithNonRoundedData(int seed, int count) + public void BasicIntrinsics256_BulkConvertNormalizedFloatToByte_WithNonRoundedData(int seed, int count) { if (this.SkipOnNonAvx2()) { @@ -153,39 +153,147 @@ public void BulkConvertNormalizedFloatToByte_WithNonRoundedData(int seed, int co byte[] dest = new byte[count]; - SimdUtils.BulkConvertNormalizedFloatToByte(source, dest); + SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByte(source, dest); byte[] expected = source.Select(f => (byte)Math.Round(f * 255f)).ToArray(); Assert.Equal(expected, dest); } - private static float Clamp255(float x) => Math.Min(255f, Math.Max(0f, x)); + public static readonly TheoryData ArraySizesDivisibleBy8 = new TheoryData { 0, 8, 16, 1024 }; + public static readonly TheoryData ArraySizesDivisibleBy4 = new TheoryData { 0, 4, 8, 28, 1020 }; + + public static readonly TheoryData ArraySizesDivisibleBy32 = new TheoryData { 0, 32, 512 }; + + public static readonly TheoryData ArbitraryArraySizes = + new TheoryData + { + 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 520, + }; [Theory] - [InlineData(1, 0)] - [InlineData(1, 8)] - [InlineData(2, 16)] - [InlineData(3, 128)] - public void BulkConvertNormalizedFloatToByteClampOverflows(int seed, int count) + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void FallbackIntrinsics128_BulkConvertByteToNormalizedFloat(int count) + { + TestImpl_BulkConvertByteToNormalizedFloat( + count, + (s, d) => SimdUtils.FallbackIntrinsics128.BulkConvertByteToNormalizedFloat(s.Span, d.Span)); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy8))] + public void BasicIntrinsics256_BulkConvertByteToNormalizedFloat(int count) { if (this.SkipOnNonAvx2()) { return; } - float[] orig = new Random(seed).GenerateRandomRoundedFloatArray(count, -50, 444); - float[] normalized = orig.Select(f => f / 255f).ToArray(); + TestImpl_BulkConvertByteToNormalizedFloat( + count, + (s, d) => SimdUtils.BasicIntrinsics256.BulkConvertByteToNormalizedFloat(s.Span, d.Span)); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy32))] + public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat(int count) + { + TestImpl_BulkConvertByteToNormalizedFloat( + count, + (s, d) => SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(s.Span, d.Span)); + } - byte[] dest = new byte[count]; + [Theory] + [MemberData(nameof(ArbitraryArraySizes))] + public void BulkConvertByteToNormalizedFloat(int count) + { + TestImpl_BulkConvertByteToNormalizedFloat( + count, + (s, d) => SimdUtils.BulkConvertByteToNormalizedFloat(s.Span, d.Span)); + } - SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(normalized, dest); + private static void TestImpl_BulkConvertByteToNormalizedFloat( + int count, + Action, Memory> convert) + { + byte[] source = new Random(count).GenerateRandomByteArray(count); + float[] result = new float[count]; + float[] expected = source.Select(b => (float)b / 255f).ToArray(); - byte[] expected = orig.Select(f => (byte)Clamp255(f)).ToArray(); + convert(source, result); - Assert.Equal(expected, dest); + Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5f)); } + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void FallbackIntrinsics128_BulkConvertNormalizedFloatToByteClampOverflows(int count) + { + TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count, + (s, d) => SimdUtils.FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy8))] + public void BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows(int count) + { + if (this.SkipOnNonAvx2()) + { + return; + } + + TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count, + (s, d) => SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy32))] + public void ExtendedIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count) + { + TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count, + (s, d) => SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span) + ); + } + + [Theory] + [MemberData(nameof(ArbitraryArraySizes))] + public void BulkConvertNormalizedFloatToByteClampOverflows(int count) + { + TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count, + (s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span) + ); + + // for small values, let's stress test the implementation a bit: + if (count > 0 && count < 10) + { + for (int i = 0; i < 20; i++) + { + TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + count, + (s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span), + i + 42); + } + } + } + + private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + int count, + Action, Memory> convert, int seed = -1) + { + seed = seed > 0 ? seed : count; + float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f); + byte[] expected = source.Select(NormalizedFloatToByte).ToArray(); + byte[] actual = new byte[count]; + + convert(source, actual); + + Assert.Equal(expected, actual); + } + + private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, f * 255f + 0.5f)); + [Theory] [InlineData(0)] [InlineData(7)] @@ -211,7 +319,7 @@ private void BulkConvertNormalizedFloatToByte_Step() float[] source = { 0, 7, 42, 255, 0.5f, 1.1f, 2.6f, 16f }; - var expected = source.Select(f => (byte)Math.Round(f)).ToArray(); + byte[] expected = source.Select(f => (byte)Math.Round(f)).ToArray(); source = source.Select(f => f / 255f).ToArray(); @@ -245,8 +353,6 @@ private void MagicConvert(Span source, Span dest) iiRef = x; - //Tuple8.OfUInt32 ii = Unsafe.As, Tuple8.OfUInt32>(ref x); - ref Tuple8.OfByte d = ref MemoryMarshal.Cast(dest)[0]; d.LoadFrom(ref ii); diff --git a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs index 6c2979fe9e..75ef611a5c 100644 --- a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs @@ -9,6 +9,74 @@ namespace SixLabors.ImageSharp.Tests.Helpers public class ImageMathsTests { + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + [InlineData(4)] + [InlineData(100)] + [InlineData(123)] + [InlineData(53436353)] + public void Modulo4(int x) + { + int actual = ImageMaths.Modulo4(x); + Assert.Equal(x % 4, actual); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(6)] + [InlineData(7)] + [InlineData(8)] + [InlineData(100)] + [InlineData(123)] + [InlineData(53436353)] + [InlineData(975)] + public void Modulo8(int x) + { + int actual = ImageMaths.Modulo8(x); + Assert.Equal(x % 8, actual); + } + + [Theory] + [InlineData(0, 2)] + [InlineData(1, 2)] + [InlineData(2, 2)] + [InlineData(0, 4)] + [InlineData(3, 4)] + [InlineData(5, 4)] + [InlineData(5, 8)] + [InlineData(8, 8)] + [InlineData(8, 16)] + [InlineData(15, 16)] + [InlineData(17, 16)] + [InlineData(17, 32)] + [InlineData(31, 32)] + [InlineData(32, 32)] + [InlineData(33, 32)] + public void Modulo2P(int x, int m) + { + int actual = ImageMaths.ModuloP2(x, m); + Assert.Equal(x % m, actual); + } + + [Theory] + [InlineData(0, 0, 0, 0)] + [InlineData(0.5f, 0, 1, 0.5f)] + [InlineData(-0.5f, -0.1f, 10, -0.1f)] + [InlineData(-0.05f, -0.1f, 10, -0.05f)] + [InlineData(9.9f, -0.1f, 10, 9.9f)] + [InlineData(10f, -0.1f, 10, 10f)] + [InlineData(10.1f, -0.1f, 10, 10f)] + public void Clamp(float x, float min, float max, float expected) + { + float actual = x.Clamp(min, max); + Assert.Equal(expected, actual); + } + [Fact] public void FasAbsResultMatchesMath() { diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs index 9e41fd94f3..abf764881b 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs @@ -1,93 +1,73 @@ -// Copyright (c) Six Labors and contributors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Buffers; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Memory; -using SixLabors.ImageSharp.PixelFormats; -using Xunit; -using Xunit.Abstractions; - -namespace SixLabors.ImageSharp.Tests.PixelFormats -{ - public partial class PixelOperationsTests - { - public class Rgba32 : PixelOperationsTests - { - public Rgba32(ITestOutputHelper output) - : base(output) - { - } - - // For 4.6 test runner MemberData does not work without redeclaring the public field in the derived test class: - public static new TheoryData ArraySizesData => new TheoryData { 7, 16, 1111 }; - - [Fact] - public void IsSpecialImplementation() - { - Assert.IsType(PixelOperations.Instance); - } - - [Fact] - public void ToVector4SimdAligned() - { - if (!Vector.IsHardwareAccelerated) - { - return; - } - - ImageSharp.PixelFormats.Rgba32[] source = CreatePixelTestData(64); - Vector4[] expected = CreateExpectedVector4Data(source); - - TestOperation( - source, - expected, - (s, d) => ImageSharp.PixelFormats.Rgba32.PixelOperations.ToVector4SimdAligned(s, d.GetSpan(), 64) - ); - } - - - // [Fact] // Profiling benchmark - enable manually! -#pragma warning disable xUnit1013 // Public method should be marked as test - public void Benchmark_ToVector4() -#pragma warning restore xUnit1013 // Public method should be marked as test - { - int times = 200000; - int count = 1024; - - using (IMemoryOwner source = Configuration.Default.MemoryAllocator.Allocate(count)) - using (IMemoryOwner dest = Configuration.Default.MemoryAllocator.Allocate(count)) - { - this.Measure( - times, - () => - { - PixelOperations.Instance.ToVector4(source.GetSpan(), dest.GetSpan(), count); - }); - } - } - } - - public class Argb32 : PixelOperationsTests - { - // For 4.6 test runner MemberData does not work without redeclaring the public field in the derived test class: - public Argb32(ITestOutputHelper output) - : base(output) - { - } - - public static new TheoryData ArraySizesData => new TheoryData { 7, 16, 1111 }; - } - - [Theory] - [WithBlankImages(1, 1, PixelTypes.All)] - public void GetGlobalInstance(TestImageProvider dummy) - where TPixel : struct, IPixel - { - Assert.NotNull(PixelOperations.Instance); +// Copyright (c) Six Labors and contributors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; +using Xunit; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.PixelFormats +{ + public partial class PixelOperationsTests + { + public class Rgba32 : PixelOperationsTests + { + public const string SkipProfilingBenchmarks = +#if true + "Profiling benchmark - enable manually!"; +#else + null; +#endif + + public Rgba32(ITestOutputHelper output) + : base(output) + { + } + + [Fact] + public void IsSpecialImplementation() + { + Assert.IsType(PixelOperations.Instance); + } + + [Fact(Skip = SkipProfilingBenchmarks)] + public void Benchmark_ToVector4() + { + int times = 200000; + int count = 1024; + + using (IMemoryOwner source = Configuration.Default.MemoryAllocator.Allocate(count)) + using (IMemoryOwner dest = Configuration.Default.MemoryAllocator.Allocate(count)) + { + this.Measure( + times, + () => + { + PixelOperations.Instance.ToVector4(source.GetSpan(), dest.GetSpan(), count); + }); + } + } + } + + public class Argb32 : PixelOperationsTests + { + public Argb32(ITestOutputHelper output) + : base(output) + { + } + } + + [Theory] + [WithBlankImages(1, 1, PixelTypes.All)] + public void GetGlobalInstance(TestImageProvider dummy) + where TPixel : struct, IPixel + { + Assert.NotNull(PixelOperations.Instance); } [Fact] @@ -99,594 +79,594 @@ public void IsOpaqueColor() Assert.False(new GraphicsOptions(true).IsOpaqueColorWithoutBlending(ImageSharp.PixelFormats.Rgba32.Transparent)); Assert.False(new GraphicsOptions(true, PixelColorBlendingMode.Lighten, 1).IsOpaqueColorWithoutBlending(ImageSharp.PixelFormats.Rgba32.Red)); Assert.False(new GraphicsOptions(true, PixelColorBlendingMode.Normal,PixelAlphaCompositionMode.DestOver, 1).IsOpaqueColorWithoutBlending(ImageSharp.PixelFormats.Rgba32.Red)); - } - } - - public abstract class PixelOperationsTests : MeasureFixture - where TPixel : struct, IPixel - { - protected PixelOperationsTests(ITestOutputHelper output) - : base(output) - { - } - - public static TheoryData ArraySizesData => new TheoryData { 7, 16, 1111 }; - - private static PixelOperations Operations => PixelOperations.Instance; - - internal static TPixel[] CreateExpectedPixelData(Vector4[] source) - { - var expected = new TPixel[source.Length]; - - for (int i = 0; i < expected.Length; i++) - { - expected[i].PackFromVector4(source[i]); - } - return expected; - } - - internal static TPixel[] CreateScaledExpectedPixelData(Vector4[] source) - { - var expected = new TPixel[source.Length]; - - for (int i = 0; i < expected.Length; i++) - { - expected[i].PackFromScaledVector4(source[i]); - } - return expected; - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void PackFromVector4(int count) - { - Vector4[] source = CreateVector4TestData(count); - TPixel[] expected = CreateExpectedPixelData(source); - - TestOperation( - source, - expected, - (s, d) => Operations.PackFromVector4(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void PackFromScaledVector4(int count) - { - Vector4[] source = CreateVector4TestData(count); - TPixel[] expected = CreateScaledExpectedPixelData(source); - - TestOperation( - source, - expected, - (s, d) => Operations.PackFromScaledVector4(s, d.GetSpan(), count) - ); - } - - internal static Vector4[] CreateExpectedVector4Data(TPixel[] source) - { - var expected = new Vector4[source.Length]; - - for (int i = 0; i < expected.Length; i++) - { - expected[i] = source[i].ToVector4(); - } - return expected; - } - - internal static Vector4[] CreateExpectedScaledVector4Data(TPixel[] source) - { - var expected = new Vector4[source.Length]; - - for (int i = 0; i < expected.Length; i++) - { - expected[i] = source[i].ToScaledVector4(); - } - return expected; - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void ToVector4(int count) - { - TPixel[] source = CreatePixelTestData(count); - Vector4[] expected = CreateExpectedVector4Data(source); - - TestOperation( - source, - expected, - (s, d) => Operations.ToVector4(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void ToScaledVector4(int count) - { - TPixel[] source = CreateScaledPixelTestData(count); - Vector4[] expected = CreateExpectedScaledVector4Data(source); - - TestOperation( - source, - expected, - (s, d) => Operations.ToScaledVector4(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void PackFromRgb24Bytes(int count) - { - byte[] source = CreateByteTestData(count * 3); - var expected = new TPixel[count]; - - for (int i = 0; i < count; i++) - { - int i3 = i * 3; - - expected[i].PackFromRgba32(new Rgba32(source[i3 + 0], source[i3 + 1], source[i3 + 2], 255)); - } - - TestOperation( - source, - expected, - (s, d) => Operations.PackFromRgb24Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void ToRgb24Bytes(int count) - { - TPixel[] source = CreatePixelTestData(count); - byte[] expected = new byte[count * 3]; - var rgb = default(Rgb24); - - for (int i = 0; i < count; i++) - { - int i3 = i * 3; - source[i].ToRgb24(ref rgb); - expected[i3] = rgb.R; - expected[i3 + 1] = rgb.G; - expected[i3 + 2] = rgb.B; - } - - TestOperation( - source, - expected, - (s, d) => Operations.ToRgb24Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void PackFromRgba32Bytes(int count) - { - byte[] source = CreateByteTestData(count * 4); - var expected = new TPixel[count]; - - for (int i = 0; i < count; i++) - { - int i4 = i * 4; - - expected[i].PackFromRgba32(new Rgba32(source[i4 + 0], source[i4 + 1], source[i4 + 2], source[i4 + 3])); - } - - TestOperation( - source, - expected, - (s, d) => Operations.PackFromRgba32Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void ToRgba32Bytes(int count) - { - TPixel[] source = CreatePixelTestData(count); - byte[] expected = new byte[count * 4]; - var rgba = default(Rgba32); - - for (int i = 0; i < count; i++) - { - int i4 = i * 4; - source[i].ToRgba32(ref rgba); - expected[i4] = rgba.R; - expected[i4 + 1] = rgba.G; - expected[i4 + 2] = rgba.B; - expected[i4 + 3] = rgba.A; - } - - TestOperation( - source, - expected, - (s, d) => Operations.ToRgba32Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void PackFromRgb48Bytes(int count) - { - byte[] source = CreateByteTestData(count * 6); - Span sourceSpan = source.AsSpan(); - var expected = new TPixel[count]; - - var rgba64 = new Rgba64(0, 0, 0, 65535); - for (int i = 0; i < count; i++) - { - int i6 = i * 6; - rgba64.Rgb = MemoryMarshal.Cast(sourceSpan.Slice(i6, 6))[0]; - expected[i].PackFromRgba64(rgba64); - } - - TestOperation( - source, - expected, - (s, d) => Operations.PackFromRgb48Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void ToRgb48Bytes(int count) - { - TPixel[] source = CreatePixelTestData(count); - byte[] expected = new byte[count * 6]; - Rgb48 rgb = default; - - for (int i = 0; i < count; i++) - { - int i6 = i * 6; - source[i].ToRgb48(ref rgb); - Rgba64Bytes rgb48Bytes = Unsafe.As(ref rgb); - expected[i6] = rgb48Bytes[0]; - expected[i6 + 1] = rgb48Bytes[1]; - expected[i6 + 2] = rgb48Bytes[2]; - expected[i6 + 3] = rgb48Bytes[3]; - expected[i6 + 4] = rgb48Bytes[4]; - expected[i6 + 5] = rgb48Bytes[5]; - } - - TestOperation( - source, - expected, - (s, d) => Operations.ToRgb48Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void PackFromRgba64Bytes(int count) - { - byte[] source = CreateByteTestData(count * 8); - Span sourceSpan = source.AsSpan(); - var expected = new TPixel[count]; - - for (int i = 0; i < count; i++) - { - int i8 = i * 8; - expected[i].PackFromRgba64(MemoryMarshal.Cast(sourceSpan.Slice(i8, 8))[0]); - } - - TestOperation( - source, - expected, - (s, d) => Operations.PackFromRgba64Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void ToRgba64Bytes(int count) - { - TPixel[] source = CreatePixelTestData(count); - byte[] expected = new byte[count * 8]; - Rgba64 rgba = default; - - for (int i = 0; i < count; i++) - { - int i8 = i * 8; - source[i].ToRgba64(ref rgba); - Rgba64Bytes rgba64Bytes = Unsafe.As(ref rgba); - expected[i8] = rgba64Bytes[0]; - expected[i8 + 1] = rgba64Bytes[1]; - expected[i8 + 2] = rgba64Bytes[2]; - expected[i8 + 3] = rgba64Bytes[3]; - expected[i8 + 4] = rgba64Bytes[4]; - expected[i8 + 5] = rgba64Bytes[5]; - expected[i8 + 6] = rgba64Bytes[6]; - expected[i8 + 7] = rgba64Bytes[7]; - } - - TestOperation( - source, - expected, - (s, d) => Operations.ToRgba64Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void PackFromBgr24Bytes(int count) - { - byte[] source = CreateByteTestData(count * 3); - var expected = new TPixel[count]; - - for (int i = 0; i < count; i++) - { - int i3 = i * 3; - - expected[i].PackFromRgba32(new Rgba32(source[i3 + 2], source[i3 + 1], source[i3 + 0], 255)); - } - - TestOperation( - source, - expected, - (s, d) => Operations.PackFromBgr24Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void ToBgr24Bytes(int count) - { - TPixel[] source = CreatePixelTestData(count); - byte[] expected = new byte[count * 3]; - var bgr = default(Bgr24); - - for (int i = 0; i < count; i++) - { - int i3 = i * 3; - source[i].ToBgr24(ref bgr); - expected[i3] = bgr.B; - expected[i3 + 1] = bgr.G; - expected[i3 + 2] = bgr.R; - } - - TestOperation( - source, - expected, - (s, d) => Operations.ToBgr24Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void PackFromBgra32Bytes(int count) - { - byte[] source = CreateByteTestData(count * 4); - var expected = new TPixel[count]; - - for (int i = 0; i < count; i++) - { - int i4 = i * 4; - - expected[i].PackFromRgba32(new Rgba32(source[i4 + 2], source[i4 + 1], source[i4 + 0], source[i4 + 3])); - } - - TestOperation( - source, - expected, - (s, d) => Operations.PackFromBgra32Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void ToZyxwBytes(int count) - { - TPixel[] source = CreatePixelTestData(count); - byte[] expected = new byte[count * 4]; - var bgra = default(Bgra32); - - for (int i = 0; i < count; i++) - { - int i4 = i * 4; - source[i].ToBgra32(ref bgra); - expected[i4] = bgra.B; - expected[i4 + 1] = bgra.G; - expected[i4 + 2] = bgra.R; - expected[i4 + 3] = bgra.A; - } - - TestOperation( - source, - expected, - (s, d) => Operations.ToBgra32Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void PackFromArgb32Bytes(int count) - { - byte[] source = CreateByteTestData(count * 4); - var expected = new TPixel[count]; - - for (int i = 0; i < count; i++) - { - int i4 = i * 4; - - expected[i].PackFromRgba32(new Rgba32(source[i4 + 1], source[i4 + 2], source[i4 + 3], source[i4 + 0])); - } - - TestOperation( - source, - expected, - (s, d) => Operations.PackFromArgb32Bytes(s, d.GetSpan(), count) - ); - } - - [Theory] - [MemberData(nameof(ArraySizesData))] - public void ToArgb32Bytes(int count) - { - TPixel[] source = CreatePixelTestData(count); - byte[] expected = new byte[count * 4]; - var argb = default(Argb32); - - for (int i = 0; i < count; i++) - { - int i4 = i * 4; - source[i].ToArgb32(ref argb); - expected[i4] = argb.A; - expected[i4 + 1] = argb.R; - expected[i4 + 2] = argb.G; - expected[i4 + 3] = argb.B; - } - - TestOperation( - source, - expected, - (s, d) => Operations.ToArgb32Bytes(s, d.GetSpan(), count) - ); - } - - private class TestBuffers : IDisposable - where TSource : struct - where TDest : struct - { - public TSource[] SourceBuffer { get; } - public IMemoryOwner ActualDestBuffer { get; } - public TDest[] ExpectedDestBuffer { get; } - - public TestBuffers(TSource[] source, TDest[] expectedDest) - { - this.SourceBuffer = source; - this.ExpectedDestBuffer = expectedDest; - this.ActualDestBuffer = Configuration.Default.MemoryAllocator.Allocate(expectedDest.Length); - } - - public void Dispose() - { - this.ActualDestBuffer.Dispose(); - } - - private const float Tolerance = 0.0001f; - - public void Verify() - { - int count = this.ExpectedDestBuffer.Length; - - if (typeof(TDest) == typeof(Vector4)) - { - - Span expected = MemoryMarshal.Cast(this.ExpectedDestBuffer.AsSpan()); - Span actual = MemoryMarshal.Cast(this.ActualDestBuffer.GetSpan()); - - for (int i = 0; i < count; i++) - { - // ReSharper disable PossibleNullReferenceException - Assert.Equal(expected[i], actual[i], new ApproximateFloatComparer(0.001f)); - // ReSharper restore PossibleNullReferenceException - } - } - else - { - Span expected = this.ExpectedDestBuffer.AsSpan(); - Span actual = this.ActualDestBuffer.GetSpan(); - for (int i = 0; i < count; i++) - { - Assert.Equal(expected[i], actual[i]); - } - } - } - } - - internal static void TestOperation( - TSource[] source, - TDest[] expected, - Action> action) - where TSource : struct - where TDest : struct - { - using (var buffers = new TestBuffers(source, expected)) - { - action(buffers.SourceBuffer, buffers.ActualDestBuffer); - buffers.Verify(); - } - } - - internal static Vector4[] CreateVector4TestData(int length) - { - var result = new Vector4[length]; - var rnd = new Random(42); // Deterministic random values - - for (int i = 0; i < result.Length; i++) - { - result[i] = GetVector(rnd); - } - return result; - } - - internal static TPixel[] CreatePixelTestData(int length) - { - var result = new TPixel[length]; - - var rnd = new Random(42); // Deterministic random values - - for (int i = 0; i < result.Length; i++) - { - Vector4 v = GetVector(rnd); - result[i].PackFromVector4(v); - } - - return result; - } - - internal static TPixel[] CreateScaledPixelTestData(int length) - { - var result = new TPixel[length]; - - var rnd = new Random(42); // Deterministic random values - - for (int i = 0; i < result.Length; i++) - { - Vector4 v = GetVector(rnd); - result[i].PackFromScaledVector4(v); - } - - return result; - } - - internal static byte[] CreateByteTestData(int length) - { - byte[] result = new byte[length]; - var rnd = new Random(42); // Deterministic random values - - for (int i = 0; i < result.Length; i++) - { - result[i] = (byte)rnd.Next(255); - } - return result; - } - - internal static Vector4 GetVector(Random rnd) - { - return new Vector4( - (float)rnd.NextDouble(), - (float)rnd.NextDouble(), - (float)rnd.NextDouble(), - (float)rnd.NextDouble() - ); - } - - [StructLayout(LayoutKind.Sequential)] - private unsafe struct Rgba64Bytes - { - public fixed byte Data[8]; - - public byte this[int idx] - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get - { - ref byte self = ref Unsafe.As(ref this); - return Unsafe.Add(ref self, idx); - } - } - } - } + } + } + + public abstract class PixelOperationsTests : MeasureFixture + where TPixel : struct, IPixel + { + protected PixelOperationsTests(ITestOutputHelper output) + : base(output) + { + } + + public static TheoryData ArraySizesData => new TheoryData { 0, 1, 2, 7, 16, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 1111 }; + + private static PixelOperations Operations => PixelOperations.Instance; + + internal static TPixel[] CreateExpectedPixelData(Vector4[] source) + { + var expected = new TPixel[source.Length]; + + for (int i = 0; i < expected.Length; i++) + { + expected[i].PackFromVector4(source[i]); + } + return expected; + } + + internal static TPixel[] CreateScaledExpectedPixelData(Vector4[] source) + { + var expected = new TPixel[source.Length]; + + for (int i = 0; i < expected.Length; i++) + { + expected[i].PackFromScaledVector4(source[i]); + } + return expected; + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromVector4(int count) + { + Vector4[] source = CreateVector4TestData(count); + TPixel[] expected = CreateExpectedPixelData(source); + + TestOperation( + source, + expected, + (s, d) => Operations.PackFromVector4(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromScaledVector4(int count) + { + Vector4[] source = CreateVector4TestData(count); + TPixel[] expected = CreateScaledExpectedPixelData(source); + + TestOperation( + source, + expected, + (s, d) => Operations.PackFromScaledVector4(s, d.GetSpan(), count) + ); + } + + internal static Vector4[] CreateExpectedVector4Data(TPixel[] source) + { + var expected = new Vector4[source.Length]; + + for (int i = 0; i < expected.Length; i++) + { + expected[i] = source[i].ToVector4(); + } + return expected; + } + + internal static Vector4[] CreateExpectedScaledVector4Data(TPixel[] source) + { + var expected = new Vector4[source.Length]; + + for (int i = 0; i < expected.Length; i++) + { + expected[i] = source[i].ToScaledVector4(); + } + return expected; + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void ToVector4(int count) + { + TPixel[] source = CreatePixelTestData(count); + Vector4[] expected = CreateExpectedVector4Data(source); + + TestOperation( + source, + expected, + (s, d) => Operations.ToVector4(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void ToScaledVector4(int count) + { + TPixel[] source = CreateScaledPixelTestData(count); + Vector4[] expected = CreateExpectedScaledVector4Data(source); + + TestOperation( + source, + expected, + (s, d) => Operations.ToScaledVector4(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromRgb24Bytes(int count) + { + byte[] source = CreateByteTestData(count * 3); + var expected = new TPixel[count]; + + for (int i = 0; i < count; i++) + { + int i3 = i * 3; + + expected[i].PackFromRgba32(new Rgba32(source[i3 + 0], source[i3 + 1], source[i3 + 2], 255)); + } + + TestOperation( + source, + expected, + (s, d) => Operations.PackFromRgb24Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void ToRgb24Bytes(int count) + { + TPixel[] source = CreatePixelTestData(count); + byte[] expected = new byte[count * 3]; + var rgb = default(Rgb24); + + for (int i = 0; i < count; i++) + { + int i3 = i * 3; + source[i].ToRgb24(ref rgb); + expected[i3] = rgb.R; + expected[i3 + 1] = rgb.G; + expected[i3 + 2] = rgb.B; + } + + TestOperation( + source, + expected, + (s, d) => Operations.ToRgb24Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromRgba32Bytes(int count) + { + byte[] source = CreateByteTestData(count * 4); + var expected = new TPixel[count]; + + for (int i = 0; i < count; i++) + { + int i4 = i * 4; + + expected[i].PackFromRgba32(new Rgba32(source[i4 + 0], source[i4 + 1], source[i4 + 2], source[i4 + 3])); + } + + TestOperation( + source, + expected, + (s, d) => Operations.PackFromRgba32Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void ToRgba32Bytes(int count) + { + TPixel[] source = CreatePixelTestData(count); + byte[] expected = new byte[count * 4]; + var rgba = default(Rgba32); + + for (int i = 0; i < count; i++) + { + int i4 = i * 4; + source[i].ToRgba32(ref rgba); + expected[i4] = rgba.R; + expected[i4 + 1] = rgba.G; + expected[i4 + 2] = rgba.B; + expected[i4 + 3] = rgba.A; + } + + TestOperation( + source, + expected, + (s, d) => Operations.ToRgba32Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromRgb48Bytes(int count) + { + byte[] source = CreateByteTestData(count * 6); + Span sourceSpan = source.AsSpan(); + var expected = new TPixel[count]; + + var rgba64 = new Rgba64(0, 0, 0, 65535); + for (int i = 0; i < count; i++) + { + int i6 = i * 6; + rgba64.Rgb = MemoryMarshal.Cast(sourceSpan.Slice(i6, 6))[0]; + expected[i].PackFromRgba64(rgba64); + } + + TestOperation( + source, + expected, + (s, d) => Operations.PackFromRgb48Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void ToRgb48Bytes(int count) + { + TPixel[] source = CreatePixelTestData(count); + byte[] expected = new byte[count * 6]; + Rgb48 rgb = default; + + for (int i = 0; i < count; i++) + { + int i6 = i * 6; + source[i].ToRgb48(ref rgb); + Rgba64Bytes rgb48Bytes = Unsafe.As(ref rgb); + expected[i6] = rgb48Bytes[0]; + expected[i6 + 1] = rgb48Bytes[1]; + expected[i6 + 2] = rgb48Bytes[2]; + expected[i6 + 3] = rgb48Bytes[3]; + expected[i6 + 4] = rgb48Bytes[4]; + expected[i6 + 5] = rgb48Bytes[5]; + } + + TestOperation( + source, + expected, + (s, d) => Operations.ToRgb48Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromRgba64Bytes(int count) + { + byte[] source = CreateByteTestData(count * 8); + Span sourceSpan = source.AsSpan(); + var expected = new TPixel[count]; + + for (int i = 0; i < count; i++) + { + int i8 = i * 8; + expected[i].PackFromRgba64(MemoryMarshal.Cast(sourceSpan.Slice(i8, 8))[0]); + } + + TestOperation( + source, + expected, + (s, d) => Operations.PackFromRgba64Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void ToRgba64Bytes(int count) + { + TPixel[] source = CreatePixelTestData(count); + byte[] expected = new byte[count * 8]; + Rgba64 rgba = default; + + for (int i = 0; i < count; i++) + { + int i8 = i * 8; + source[i].ToRgba64(ref rgba); + Rgba64Bytes rgba64Bytes = Unsafe.As(ref rgba); + expected[i8] = rgba64Bytes[0]; + expected[i8 + 1] = rgba64Bytes[1]; + expected[i8 + 2] = rgba64Bytes[2]; + expected[i8 + 3] = rgba64Bytes[3]; + expected[i8 + 4] = rgba64Bytes[4]; + expected[i8 + 5] = rgba64Bytes[5]; + expected[i8 + 6] = rgba64Bytes[6]; + expected[i8 + 7] = rgba64Bytes[7]; + } + + TestOperation( + source, + expected, + (s, d) => Operations.ToRgba64Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromBgr24Bytes(int count) + { + byte[] source = CreateByteTestData(count * 3); + var expected = new TPixel[count]; + + for (int i = 0; i < count; i++) + { + int i3 = i * 3; + + expected[i].PackFromRgba32(new Rgba32(source[i3 + 2], source[i3 + 1], source[i3 + 0], 255)); + } + + TestOperation( + source, + expected, + (s, d) => Operations.PackFromBgr24Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void ToBgr24Bytes(int count) + { + TPixel[] source = CreatePixelTestData(count); + byte[] expected = new byte[count * 3]; + var bgr = default(Bgr24); + + for (int i = 0; i < count; i++) + { + int i3 = i * 3; + source[i].ToBgr24(ref bgr); + expected[i3] = bgr.B; + expected[i3 + 1] = bgr.G; + expected[i3 + 2] = bgr.R; + } + + TestOperation( + source, + expected, + (s, d) => Operations.ToBgr24Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromBgra32Bytes(int count) + { + byte[] source = CreateByteTestData(count * 4); + var expected = new TPixel[count]; + + for (int i = 0; i < count; i++) + { + int i4 = i * 4; + + expected[i].PackFromRgba32(new Rgba32(source[i4 + 2], source[i4 + 1], source[i4 + 0], source[i4 + 3])); + } + + TestOperation( + source, + expected, + (s, d) => Operations.PackFromBgra32Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void ToZyxwBytes(int count) + { + TPixel[] source = CreatePixelTestData(count); + byte[] expected = new byte[count * 4]; + var bgra = default(Bgra32); + + for (int i = 0; i < count; i++) + { + int i4 = i * 4; + source[i].ToBgra32(ref bgra); + expected[i4] = bgra.B; + expected[i4 + 1] = bgra.G; + expected[i4 + 2] = bgra.R; + expected[i4 + 3] = bgra.A; + } + + TestOperation( + source, + expected, + (s, d) => Operations.ToBgra32Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromArgb32Bytes(int count) + { + byte[] source = CreateByteTestData(count * 4); + var expected = new TPixel[count]; + + for (int i = 0; i < count; i++) + { + int i4 = i * 4; + + expected[i].PackFromRgba32(new Rgba32(source[i4 + 1], source[i4 + 2], source[i4 + 3], source[i4 + 0])); + } + + TestOperation( + source, + expected, + (s, d) => Operations.PackFromArgb32Bytes(s, d.GetSpan(), count) + ); + } + + [Theory] + [MemberData(nameof(ArraySizesData))] + public void ToArgb32Bytes(int count) + { + TPixel[] source = CreatePixelTestData(count); + byte[] expected = new byte[count * 4]; + var argb = default(Argb32); + + for (int i = 0; i < count; i++) + { + int i4 = i * 4; + source[i].ToArgb32(ref argb); + expected[i4] = argb.A; + expected[i4 + 1] = argb.R; + expected[i4 + 2] = argb.G; + expected[i4 + 3] = argb.B; + } + + TestOperation( + source, + expected, + (s, d) => Operations.ToArgb32Bytes(s, d.GetSpan(), count) + ); + } + + private class TestBuffers : IDisposable + where TSource : struct + where TDest : struct + { + public TSource[] SourceBuffer { get; } + public IMemoryOwner ActualDestBuffer { get; } + public TDest[] ExpectedDestBuffer { get; } + + public TestBuffers(TSource[] source, TDest[] expectedDest) + { + this.SourceBuffer = source; + this.ExpectedDestBuffer = expectedDest; + this.ActualDestBuffer = Configuration.Default.MemoryAllocator.Allocate(expectedDest.Length); + } + + public void Dispose() + { + this.ActualDestBuffer.Dispose(); + } + + private const float Tolerance = 0.0001f; + + public void Verify() + { + int count = this.ExpectedDestBuffer.Length; + + if (typeof(TDest) == typeof(Vector4)) + { + + Span expected = MemoryMarshal.Cast(this.ExpectedDestBuffer.AsSpan()); + Span actual = MemoryMarshal.Cast(this.ActualDestBuffer.GetSpan()); + + for (int i = 0; i < count; i++) + { + // ReSharper disable PossibleNullReferenceException + Assert.Equal(expected[i], actual[i], new ApproximateFloatComparer(0.001f)); + // ReSharper restore PossibleNullReferenceException + } + } + else + { + Span expected = this.ExpectedDestBuffer.AsSpan(); + Span actual = this.ActualDestBuffer.GetSpan(); + for (int i = 0; i < count; i++) + { + Assert.Equal(expected[i], actual[i]); + } + } + } + } + + internal static void TestOperation( + TSource[] source, + TDest[] expected, + Action> action) + where TSource : struct + where TDest : struct + { + using (var buffers = new TestBuffers(source, expected)) + { + action(buffers.SourceBuffer, buffers.ActualDestBuffer); + buffers.Verify(); + } + } + + internal static Vector4[] CreateVector4TestData(int length) + { + var result = new Vector4[length]; + var rnd = new Random(42); // Deterministic random values + + for (int i = 0; i < result.Length; i++) + { + result[i] = GetVector(rnd); + } + return result; + } + + internal static TPixel[] CreatePixelTestData(int length) + { + var result = new TPixel[length]; + + var rnd = new Random(42); // Deterministic random values + + for (int i = 0; i < result.Length; i++) + { + Vector4 v = GetVector(rnd); + result[i].PackFromVector4(v); + } + + return result; + } + + internal static TPixel[] CreateScaledPixelTestData(int length) + { + var result = new TPixel[length]; + + var rnd = new Random(42); // Deterministic random values + + for (int i = 0; i < result.Length; i++) + { + Vector4 v = GetVector(rnd); + result[i].PackFromScaledVector4(v); + } + + return result; + } + + internal static byte[] CreateByteTestData(int length) + { + byte[] result = new byte[length]; + var rnd = new Random(42); // Deterministic random values + + for (int i = 0; i < result.Length; i++) + { + result[i] = (byte)rnd.Next(255); + } + return result; + } + + internal static Vector4 GetVector(Random rnd) + { + return new Vector4( + (float)rnd.NextDouble(), + (float)rnd.NextDouble(), + (float)rnd.NextDouble(), + (float)rnd.NextDouble() + ); + } + + [StructLayout(LayoutKind.Sequential)] + private unsafe struct Rgba64Bytes + { + public fixed byte Data[8]; + + public byte this[int idx] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get + { + ref byte self = ref Unsafe.As(ref this); + return Unsafe.Add(ref self, idx); + } + } + } + } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs b/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs index 0b1b89cc00..912b86e347 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs @@ -33,20 +33,28 @@ public static Vector4[] GenerateRandomVectorArray(this Random rnd, int length, f return values; } - public static float[] GenerateRandomRoundedFloatArray(this Random rnd, int length, int minVal, int maxValExclusive) + public static float[] GenerateRandomRoundedFloatArray(this Random rnd, int length, float minVal, float maxVal) { float[] values = new float[length]; for (int i = 0; i < length; i++) { - int val = rnd.Next(minVal, maxValExclusive); - values[i] = (float)val; + values[i] = (float) Math.Round(rnd.GetRandomFloat(minVal, maxVal)); } return values; } - private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + + + public static byte[] GenerateRandomByteArray(this Random rnd, int length) + { + byte[] values = new byte[length]; + rnd.NextBytes(values); + return values; + } + + private static float GetRandomFloat(this Random rnd, float minVal, float maxVal) { return (float)rnd.NextDouble() * (maxVal - minVal) + minVal; } diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs index 8a3e69059f..30bb16c2a0 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs @@ -3,6 +3,8 @@ using System; using System.IO; +using System.Reflection; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Formats;