SixLabors · antonfirsov · Oct 21, 2018 · Oct 14, 2018 · Oct 14, 2018 · Oct 14, 2018
diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs
@@ -39,14 +39,39 @@ public static int LeastCommonMultiple(int a, int b)
             return (a / GreatestCommonDivisor(a, b)) * b;
         }
 
+        /// <summary>
+        /// Calculates <paramref name="x"/> % 4
+        /// </summary>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static int Modulo4(int x) => x & 3;
+
+        /// <summary>
+        /// Calculates <paramref name="x"/> % 8
+        /// </summary>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static int Modulo8(int x) => x & 7;
+
+        /// <summary>
+        /// Fast (x mod m) calculator, with the restriction that
+        /// <paramref name="m"/> should be power of 2.
+        /// </summary>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static int ModuloP2(int x, int m)
+        {
+            return x & (m - 1);
+        }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static float Clamp(float x, float min, float max) => Math.Min(max, Math.Max(min, x));
+
         /// <summary>
         /// Returns the absolute value of a 32-bit signed integer. Uses bit shifting to speed up the operation.
         /// </summary>
         /// <param name="x">
         /// A number that is greater than <see cref="int.MinValue"/>, but less than or equal to <see cref="int.MaxValue"/>
         /// </param>
         /// <returns>The <see cref="int"/></returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(InliningOptions.ShortMethod)]
         public static int FastAbs(int x)
         {
             int y = x >> 31;
@@ -58,15 +83,15 @@ public static int FastAbs(int x)
         /// </summary>
         /// <param name="x">A single-precision floating-point number</param>
         /// <returns>The number <paramref name="x" /> raised to the power of 2.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(InliningOptions.ShortMethod)]
         public static float Pow2(float x) => x * x;
 
         /// <summary>
         /// Returns a specified number raised to the power of 3
         /// </summary>
         /// <param name="x">A single-precision floating-point number</param>
         /// <returns>The number <paramref name="x" /> raised to the power of 3.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(InliningOptions.ShortMethod)]
         public static float Pow3(float x) => x * x * x;
 
         /// <summary>
@@ -77,15 +102,15 @@ public static int FastAbs(int x)
         /// <returns>
         /// The <see cref="int"/>
         /// </returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(InliningOptions.ShortMethod)]
         public static int GetBitsNeededForColorDepth(int colors) => Math.Max(1, (int)Math.Ceiling(Math.Log(colors, 2)));
 
         /// <summary>
         /// Returns how many colors will be created by the specified number of bits.
         /// </summary>
         /// <param name="bitDepth">The bit depth.</param>
         /// <returns>The <see cref="int"/></returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(InliningOptions.ShortMethod)]
         public static int GetColorCountForBitDepth(int bitDepth) => 1 << bitDepth;
 
         /// <summary>
@@ -94,7 +119,7 @@ public static int FastAbs(int x)
         /// <param name="x">The x provided to G(x).</param>
         /// <param name="sigma">The spread of the blur.</param>
         /// <returns>The Gaussian G(x)</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(InliningOptions.ShortMethod)]
         public static float Gaussian(float x, float sigma)
         {
             const float Numerator = 1.0f;
@@ -117,7 +142,7 @@ public static float Gaussian(float x, float sigma)
         /// <returns>
         /// The sine cardinal of <paramref name="f" />.
         /// </returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(InliningOptions.ShortMethod)]
         public static float SinC(float f)
         {
             if (MathF.Abs(f) > Constants.Epsilon)
@@ -140,7 +165,7 @@ public static float SinC(float f)
         /// <returns>
         /// The <see cref="float"/>.
         /// </returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(InliningOptions.ShortMethod)]
         public static float GetBcValue(float x, float b, float c)
         {
             if (x < 0F)
@@ -176,7 +201,7 @@ public static float GetBcValue(float x, float b, float c)
         /// <returns>
         /// The bounding <see cref="Rectangle"/>.
         /// </returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(InliningOptions.ShortMethod)]
         public static Rectangle GetBoundingRectangle(Point topLeft, Point bottomRight) => new Rectangle(topLeft.X, topLeft.Y, bottomRight.X - topLeft.X, bottomRight.Y - topLeft.Y);
 
         /// <summary>

diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
@@ -14,35 +14,65 @@ namespace SixLabors.ImageSharp
     internal static partial class SimdUtils
     {
         /// <summary>
-        /// 256bit / AVX2 intrinsics NOT depending on newer API-s (Vector.Widen, Vector.Narrow, Vector.ConvertTo*)
+        /// Implementation with 256bit / AVX2 intrinsics NOT depending on newer API-s (Vector.Widen etc.)
         /// </summary>
         public static class BasicIntrinsics256
         {
             public static bool IsAvailable { get; } = IsAvx2CompatibleArchitecture;
 
             /// <summary>
-            /// <see cref="BulkConvertByteToNormalizedFloat"/> as much elements as possible, slicing them down (keeping the remainder).
+            /// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
             /// </summary>
+            [MethodImpl(InliningOptions.ShortMethod)]
             internal static void BulkConvertByteToNormalizedFloatReduce(
                 ref ReadOnlySpan<byte> source,
                 ref Span<float> dest)
             {
                 DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
 
-                if (IsAvailable)
+                if (!IsAvailable)
                 {
-                    int remainder = source.Length % 8;
-                    int alignedCount = source.Length - remainder;
-
-                    if (alignedCount > 0)
-                    {
-                        BulkConvertByteToNormalizedFloat(
-                            source.Slice(0, alignedCount),
-                            dest.Slice(0, alignedCount));
-
-                        source = source.Slice(alignedCount);
-                        dest = dest.Slice(alignedCount);
-                    }
+                    return;
+                }
+
+                int remainder = ImageMaths.Modulo8(source.Length);
+                int adjustedCount = source.Length - remainder;
+
+                if (adjustedCount > 0)
+                {
+                    BulkConvertByteToNormalizedFloat(
+                        source.Slice(0, adjustedCount),
+                        dest.Slice(0, adjustedCount));
+
+                    source = source.Slice(adjustedCount);
+                    dest = dest.Slice(adjustedCount);
+                }
+            }
+
+            /// <summary>
+            /// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
+            /// </summary>
+            [MethodImpl(InliningOptions.ShortMethod)]
+            internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
+                ref ReadOnlySpan<float> source,
+                ref Span<byte> dest)
+            {
+                DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
+
+                if (!IsAvailable)
+                {
+                    return;
+                }
+
+                int remainder = ImageMaths.Modulo8(source.Length);
+                int adjustedCount = source.Length - remainder;
+
+                if (adjustedCount > 0)
+                {
+                    BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
+
+                    source = source.Slice(adjustedCount);
+                    dest = dest.Slice(adjustedCount);
                 }
             }
 
@@ -57,7 +87,7 @@ internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source,
             {
                 GuardAvx2(nameof(BulkConvertByteToNormalizedFloat));
 
-                DebugGuard.IsTrue((dest.Length % 8) == 0, nameof(source), "dest.Length should be divisable by 8!");
+                DebugGuard.IsTrue(ImageMaths.Modulo8(dest.Length) == 0, nameof(source), "dest.Length should be divisable by 8!");
 
                 var bVec = new Vector<float>(256.0f / 255.0f);
                 var magicFloat = new Vector<float>(32768.0f);
@@ -93,38 +123,14 @@ internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source,
                 }
             }
 
-            /// <summary>
-            /// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as much elements as possible, slicing them down (keeping the remainder).
-            /// </summary>
-            internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
-                ref ReadOnlySpan<float> source,
-                ref Span<byte> dest)
-            {
-                DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
-
-                if (IsAvailable)
-                {
-                    int remainder = source.Length % Vector<byte>.Count;
-                    int alignedCount = source.Length - remainder;
-
-                    if (alignedCount > 0)
-                    {
-                        BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, alignedCount), dest.Slice(0, alignedCount));
-
-                        source = source.Slice(alignedCount);
-                        dest = dest.Slice(alignedCount);
-                    }
-                }
-            }
-
             /// <summary>
             /// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> which is faster on older runtimes.
             /// </summary>
             internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
             {
                 GuardAvx2(nameof(BulkConvertNormalizedFloatToByteClampOverflows));
 
-                DebugGuard.IsTrue((source.Length % 8) == 0, nameof(source), "source.Length should be divisible by 8!");
+                DebugGuard.IsTrue(ImageMaths.Modulo8(source.Length) == 0, nameof(source), "source.Length should be divisible by 8!");
 
                 if (source.Length == 0)
                 {
@@ -174,7 +180,10 @@ internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source
             {
                 GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
 
-                DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
+                DebugGuard.IsTrue(
+                    ImageMaths.Modulo8(source.Length) == 0,
+                    nameof(source),
+                    "source.Length should be divisible by 8!");
 
                 if (source.Length == 0)
                 {