From 3bc39e42c83e67b2a0d85d555884fcb998dfff38 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Wed, 15 Jan 2025 13:38:49 -0800 Subject: [PATCH 1/7] improve support for 512-bit Vector --- src/coreclr/jit/hwintrinsicxarch.cpp | 180 ++++--- src/coreclr/jit/importer.cpp | 2 +- .../Common/Compiler/InstructionSetSupport.cs | 27 +- .../tests/GenericVectorTests.cs | 39 ++ .../src/System/Numerics/Vector.cs | 21 +- .../System/Runtime/Intrinsics/Vector256.cs | 29 +- .../src/System/SpanHelpers.T.cs | 29 +- .../PInvoke/Generics/GenericsTest.VectorB.cs | 7 +- .../PInvoke/Generics/GenericsTest.VectorC.cs | 7 +- .../PInvoke/Generics/GenericsTest.VectorD.cs | 7 +- .../PInvoke/Generics/GenericsTest.VectorF.cs | 7 +- .../PInvoke/Generics/GenericsTest.VectorL.cs | 7 +- .../PInvoke/Generics/GenericsTest.VectorU.cs | 7 +- .../ConstantFolding/StaticReadonlySimd.cs | 8 +- .../HardwareIntrinsics/X86/X86Base/CpuId.cs | 24 +- .../JitBlue/GitHub_36614/GitHub_36614.cs | 4 +- .../JitBlue/Runtime_34587/Runtime_34587.cs | 4 +- src/tests/JIT/SIMD/BitwiseOperations.cs | 18 +- src/tests/JIT/SIMD/VectorGet.cs | 36 ++ .../SmokeTests/HardwareIntrinsics/Program.cs | 2 + .../X64Avx512_VectorT512.csproj | 44 ++ .../HardwareIntrinsics/X86/CpuId.cs | 453 ------------------ .../X86/CpuId_R2R_Avx.csproj | 2 +- .../X86/CpuId_R2R_Avx2.csproj | 2 +- .../X86/CpuId_R2R_Avx512.csproj | 2 +- .../X86/CpuId_R2R_Avx_NoAvx2.csproj | 2 +- .../X86/CpuId_R2R_Baseline.csproj | 2 +- .../X86/CpuId_R2R_Sse42.csproj | 2 +- 28 files changed, 355 insertions(+), 619 deletions(-) create mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512_VectorT512.csproj delete mode 100644 src/tests/readytorun/HardwareIntrinsics/X86/CpuId.cs diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 4d0ebbe19b9f9f..9e8f195f387701 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1553,34 +1553,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector128_AsVector: - { - assert(sig->numArgs == 1); - uint32_t vectorTByteLength = getVectorTByteLength(); - - if (vectorTByteLength == YMM_REGSIZE_BYTES) - { - // Vector is TYP_SIMD32, so we should treat this as a call to Vector128.ToVector256 - return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig R2RARG(&emptyEntryPoint), - simdBaseJitType, retType, simdSize, mustExpand); - } - else if (vectorTByteLength == XMM_REGSIZE_BYTES) - { - // We fold away the cast here, as it only exists to satisfy - // the type system. It is safe to do this here since the retNode type - // and the signature return type are both the same TYP_SIMD. - - retNode = impSIMDPopStack(); - SetOpLclRelatedToSIMDIntrinsic(retNode); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); - } - else - { - assert(vectorTByteLength == 0); - } - break; - } - case NI_Vector128_AsVector2: case NI_Vector128_AsVector3: { @@ -1673,10 +1645,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case TYP_SIMD32: + case TYP_SIMD64: { - // Vector is TYP_SIMD32, so we should treat this as a call to Vector256.GetLower - return impSpecialIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig R2RARG(&emptyEntryPoint), - simdBaseJitType, retType, simdSize, mustExpand); + // Vector is larger, so we should treat this as a call to the appropriate narrowing intrinsic + intrinsic = simdSize == TYP_SIMD32 ? NI_Vector256_GetLower : NI_Vector512_GetLower128; + + return impSpecialIntrinsic(intrinsic, clsHnd, method, sig R2RARG(&emptyEntryPoint), simdBaseJitType, + retType, simdSize, mustExpand); } default: @@ -1700,13 +1675,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector128_AsVector: case NI_Vector256_AsVector: + case NI_Vector512_AsVector: case NI_Vector256_AsVector256: + case NI_Vector512_AsVector512: { assert(sig->numArgs == 1); uint32_t vectorTByteLength = getVectorTByteLength(); - if (vectorTByteLength == YMM_REGSIZE_BYTES) + if (vectorTByteLength == simdSize) { // We fold away the cast here, as it only exists to satisfy // the type system. It is safe to do this here since the retNode type @@ -1718,86 +1696,90 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - else if (vectorTByteLength == XMM_REGSIZE_BYTES) + else if (vectorTByteLength != 0) { - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - // We support Vector256 but Vector is only 16-bytes, so we should - // treat this method as a call to Vector256.GetLower or Vector128.ToVector256 + // Vector is a different size than the source/target SIMD type, so we should + // treat this as a call to the appropriate narrowing or widening intrinsic. + + NamedIntrinsic convertIntrinsic = NI_Illegal; - if (intrinsic == NI_Vector256_AsVector) + switch (vectorTByteLength) + { + case XMM_REGSIZE_BYTES: { - return impSpecialIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig R2RARG(&emptyEntryPoint), - simdBaseJitType, retType, simdSize, mustExpand); + switch (intrinsic) + { + case NI_Vector256_AsVector: + convertIntrinsic = NI_Vector256_GetLower; + break; + case NI_Vector512_AsVector: + convertIntrinsic = NI_Vector512_GetLower128; + break; + case NI_Vector256_AsVector256: + convertIntrinsic = NI_Vector128_ToVector256; + break; + case NI_Vector512_AsVector512: + convertIntrinsic = NI_Vector128_ToVector512; + break; + default: + unreached(); + } + break; } - else + + case YMM_REGSIZE_BYTES: { - assert(intrinsic == NI_Vector256_AsVector256); - return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, - sig R2RARG(&emptyEntryPoint), simdBaseJitType, retType, 16, - mustExpand); + switch (intrinsic) + { + case NI_Vector128_AsVector: + convertIntrinsic = NI_Vector128_ToVector256; + break; + case NI_Vector512_AsVector: + convertIntrinsic = NI_Vector512_GetLower; + break; + case NI_Vector512_AsVector512: + convertIntrinsic = NI_Vector256_ToVector512; + break; + default: + unreached(); + } + break; } - } - } - else - { - assert(vectorTByteLength == 0); - } - break; - } - - case NI_Vector512_AsVector: - case NI_Vector512_AsVector512: - { - assert(sig->numArgs == 1); - uint32_t vectorTByteLength = getVectorTByteLength(); - - if (vectorTByteLength == YMM_REGSIZE_BYTES) - { - assert(IsBaselineVector512IsaSupportedDebugOnly()); - - // We support Vector512 but Vector is only 32-bytes, so we should - // treat this method as a call to Vector512.GetLower or Vector256.ToVector512 - if (intrinsic == NI_Vector512_AsVector) - { - return impSpecialIntrinsic(NI_Vector512_GetLower, clsHnd, method, sig R2RARG(&emptyEntryPoint), - simdBaseJitType, retType, simdSize, mustExpand); - } - else - { - assert(intrinsic == NI_Vector512_AsVector512); - return impSpecialIntrinsic(NI_Vector256_ToVector512, clsHnd, method, sig R2RARG(&emptyEntryPoint), - simdBaseJitType, retType, 32, mustExpand); - } - break; - } - else if (vectorTByteLength == XMM_REGSIZE_BYTES) - { - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) - { - // We support Vector512 but Vector is only 16-bytes, so we should - // treat this method as a call to Vector512.GetLower128 or Vector128.ToVector512 - - if (intrinsic == NI_Vector512_AsVector) + case ZMM_REGSIZE_BYTES: { - return impSpecialIntrinsic(NI_Vector512_GetLower128, clsHnd, method, - sig R2RARG(&emptyEntryPoint), simdBaseJitType, retType, simdSize, - mustExpand); + switch (intrinsic) + { + case NI_Vector128_AsVector: + convertIntrinsic = NI_Vector128_ToVector512; + break; + case NI_Vector256_AsVector: + convertIntrinsic = NI_Vector256_ToVector512; + break; + case NI_Vector256_AsVector256: + convertIntrinsic = NI_Vector512_GetLower; + break; + default: + unreached(); + } + break; } - else + + default: { - assert(intrinsic == NI_Vector512_AsVector512); - return impSpecialIntrinsic(NI_Vector128_ToVector512, clsHnd, method, - sig R2RARG(&emptyEntryPoint), simdBaseJitType, retType, 16, - mustExpand); + unreached(); } } + + unsigned convertSize = simdSize; + bool sizeFound = HWIntrinsicInfo::tryLookupSimdSize(convertIntrinsic, &convertSize); + assert(sizeFound); + + return impSpecialIntrinsic(convertIntrinsic, clsHnd, method, sig R2RARG(&emptyEntryPoint), + simdBaseJitType, retType, convertSize, mustExpand); } - else - { - assert(vectorTByteLength == 0); - } + + // VectorT ISA was not present. Fall back to managed. break; } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 576e8be6f367ce..ea6e8030cbadc6 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3785,7 +3785,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI int simdWidth = getSIMDTypeSizeInBytes(fieldClsHnd); if ((simdWidth > 0) && IsBaselineSimdIsaSupported()) { - assert((totalSize <= 32) && (totalSize <= MaxStructSize)); + assert((totalSize <= 64) && (totalSize <= MaxStructSize)); var_types simdType = getSIMDTypeForSize(simdWidth); bool hwAccelerated = true; diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 480a604d6f7b86..e611d18c630330 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -121,10 +121,13 @@ public SimdVectorLength GetVectorTSimdVector() Debug.Assert(InstructionSet.X64_VectorT256 == InstructionSet.X86_VectorT256); Debug.Assert(InstructionSet.X64_VectorT512 == InstructionSet.X86_VectorT512); - // TODO-XArch: Add support for 512-bit Vector - Debug.Assert(!IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT512)); - - if (IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT256)) + if (IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT512)) + { + Debug.Assert(!IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT128)); + Debug.Assert(!IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT256)); + return SimdVectorLength.Vector512Bit; + } + else if (IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT256)) { Debug.Assert(!IsInstructionSetOptimisticallySupported(InstructionSet.X64_VectorT128)); return SimdVectorLength.Vector256Bit; @@ -437,15 +440,15 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128)); supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); - if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX2)) + if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX512F) && (maxVectorTBitWidth >= 512)) { - if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)) - { - supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128); - supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256); - } - - // TODO-XArch: Add support for 512-bit Vector + supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128); + supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512); + } + else if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX2) && (maxVectorTBitWidth is 0 or >= 256)) + { + supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128); + supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256); } break; } diff --git a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs index 28ed4f701d4693..2a0ac9701de02e 100644 --- a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs +++ b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs @@ -3114,6 +3114,19 @@ public void ConvertUInt32ToSingle() } } + [Fact] + public void ConvertUInt32ToSingleWithReflection() + { + MethodInfo method = typeof(Vector).GetMethod(nameof(Vector.ConvertToSingle), [typeof(Vector)]); + uint[] source = GenerateRandomValuesForVector(); + Vector sourceVec = new Vector(source); + Vector targetVec = (Vector)method.Invoke(null, [sourceVec]); + for (int i = 0; i < Vector.Count; i++) + { + Assert.Equal(unchecked((float)source[i]), targetVec[i]); + } + } + [Fact] public void ConvertInt64ToDouble() { @@ -3126,6 +3139,19 @@ public void ConvertInt64ToDouble() } } + [Fact] + public void ConvertInt64ToDoubleWithReflection() + { + MethodInfo method = typeof(Vector).GetMethod(nameof(Vector.ConvertToDouble), [typeof(Vector)]); + long[] source = GenerateRandomValuesForVector(); + Vector sourceVec = new Vector(source); + Vector targetVec = (Vector)method.Invoke(null, [sourceVec]); + for (int i = 0; i < Vector.Count; i++) + { + Assert.Equal(unchecked((double)source[i]), targetVec[i]); + } + } + [Fact] public void ConvertUInt64ToDouble() { @@ -3138,6 +3164,19 @@ public void ConvertUInt64ToDouble() } } + [Fact] + public void ConvertUInt64ToDoubleWithReflection() + { + MethodInfo method = typeof(Vector).GetMethod(nameof(Vector.ConvertToDouble), [typeof(Vector)]); + ulong[] source = GenerateRandomValuesForVector(); + Vector sourceVec = new Vector(source); + Vector targetVec = (Vector)method.Invoke(null, [sourceVec]); + for (int i = 0; i < Vector.Count; i++) + { + Assert.Equal(unchecked((double)source[i]), targetVec[i]); + } + } + [Fact] public void ConvertSingleToInt32() { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs index 355c19d4cfe794..40e9fdc8c16510 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs @@ -328,9 +328,12 @@ public static Vector ClampNative(Vector value, Vector min, Vector [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToDouble(Vector value) { - if (Avx2.IsSupported) + if (Vector.Count == Vector512.Count) + { + return Vector512.ConvertToDouble(value.AsVector512()).AsVector(); + } + else if (Vector.Count == Vector256.Count) { - Debug.Assert(Vector.Count == Vector256.Count); return Vector256.ConvertToDouble(value.AsVector256()).AsVector(); } else @@ -348,9 +351,12 @@ public static Vector ConvertToDouble(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToDouble(Vector value) { - if (Avx2.IsSupported) + if (Vector.Count == Vector512.Count) + { + return Vector512.ConvertToDouble(value.AsVector512()).AsVector(); + } + else if (Vector.Count == Vector256.Count) { - Debug.Assert(Vector.Count == Vector256.Count); return Vector256.ConvertToDouble(value.AsVector256()).AsVector(); } else @@ -453,9 +459,12 @@ public static Vector ConvertToSingle(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToSingle(Vector value) { - if (Avx2.IsSupported) + if (Vector.Count == Vector512.Count) + { + return Vector512.ConvertToSingle(value.AsVector512()).AsVector(); + } + else if (Vector.Count == Vector256.Count) { - Debug.Assert(Vector.Count == Vector256.Count); return Vector256.ConvertToSingle(value.AsVector256()).AsVector(); } else diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 6708b81a203610..ebd1bb725bebdd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -228,12 +228,19 @@ public static Vector256 As(this Vector256 vector) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AsVector256(this Vector value) { - Debug.Assert(Vector256.Count >= Vector.Count); ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType(); - Vector256 result = default; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Vector.Count >= Vector256.Count) + { + ref byte address = ref Unsafe.As, byte>(ref value); + return Unsafe.ReadUnaligned>(ref address); + } + else + { + Vector256 result = default; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Reinterprets a as a new . @@ -245,11 +252,19 @@ public static Vector256 AsVector256(this Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector AsVector(this Vector256 value) { - Debug.Assert(Vector256.Count >= Vector.Count); ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType(); - ref byte address = ref Unsafe.As, byte>(ref value); - return Unsafe.ReadUnaligned>(ref address); + if (Vector256.Count >= Vector.Count) + { + ref byte address = ref Unsafe.As, byte>(ref value); + return Unsafe.ReadUnaligned>(ref address); + } + else + { + Vector result = default; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Computes the bitwise-and of two vectors. diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 0d61e3b662da7a..4c1cf529384d1d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -65,27 +65,48 @@ public static unsafe void Fill(ref T refData, nuint numElements, T value) { vector = Vector256.Create(vec128).AsVector(); } + else if (Vector.Count == 64) + { + vector = Vector512.Create(vec128).AsVector(); + } else { - Debug.Fail("Vector isn't 128 or 256 bits in size?"); + Debug.Fail("Vector is unexpected size."); goto CannotVectorize; } } else if (sizeof(T) == 32) { + Vector256 vec256 = Unsafe.As>(ref tmp); if (Vector.Count == 32) { - vector = Unsafe.As>(ref tmp).AsVector(); + vector = vec256.AsVector(); + } + else if (Vector.Count == 64) + { + vector = Vector512.Create(vec256).AsVector(); + } + else + { + Debug.Fail("Vector is unexpected size."); + goto CannotVectorize; + } + } + else if (sizeof(T) == 64) + { + if (Vector.Count == 64) + { + vector = Unsafe.As>(ref tmp).AsVector(); } else { - Debug.Fail("Vector isn't 256 bits in size?"); + Debug.Fail("Vector is unexpected size."); goto CannotVectorize; } } else { - Debug.Fail("Vector is greater than 256 bits in size?"); + Debug.Fail("Vector is greater than 512 bits in size?"); goto CannotVectorize; } diff --git a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorB.cs b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorB.cs index 3c733b6cfd0fe0..c6270319831da3 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorB.cs +++ b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorB.cs @@ -69,7 +69,12 @@ public unsafe partial class GenericsTest [ActiveIssue("https://github.com/dotnet/runtimelab/issues/177", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))] public static void TestVectorB() { - if (Vector.Count == 32) + if (Vector.Count == 64) + { + // TODO: P/Invoke tests do not yet handle 512-bit Vector + return; + } + else if (Vector.Count == 32) { TestVectorB256(); } diff --git a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorC.cs b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorC.cs index 7f78d30fca6d3f..3753aad056294c 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorC.cs +++ b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorC.cs @@ -69,7 +69,12 @@ public unsafe partial class GenericsTest [ActiveIssue("https://github.com/dotnet/runtimelab/issues/177", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))] public static void TestVectorC() { - if (Vector.Count == 16) + if (Vector.Count == 32) + { + // TODO: P/Invoke tests do not yet handle 512-bit Vector + return; + } + else if (Vector.Count == 16) { TestVectorC256(); } diff --git a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorD.cs b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorD.cs index a8638a7213d531..c3ce7b1d1a3e82 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorD.cs +++ b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorD.cs @@ -69,7 +69,12 @@ public unsafe partial class GenericsTest [ActiveIssue("https://github.com/dotnet/runtimelab/issues/177", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))] public static void TestVectorD() { - if (Vector.Count == 4) + if (Vector.Count == 8) + { + // TODO: P/Invoke tests do not yet handle 512-bit Vector + return; + } + else if (Vector.Count == 4) { TestVectorD256(); } diff --git a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorF.cs b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorF.cs index abae129e978fc6..743d28ad715f86 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorF.cs +++ b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorF.cs @@ -69,7 +69,12 @@ public unsafe partial class GenericsTest [ActiveIssue("https://github.com/dotnet/runtimelab/issues/177", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))] public static void TestVectorF() { - if (Vector.Count == 8) + if (Vector.Count == 16) + { + // TODO: P/Invoke tests do not yet handle 512-bit Vector + return; + } + else if (Vector.Count == 8) { TestVectorF256(); } diff --git a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorL.cs b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorL.cs index df1743cc4aa0f7..56080a40ce2d7d 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorL.cs +++ b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorL.cs @@ -69,7 +69,12 @@ public unsafe partial class GenericsTest [ActiveIssue("https://github.com/dotnet/runtimelab/issues/177", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))] public static void TestVectorL() { - if (Vector.Count == 4) + if (Vector.Count == 8) + { + // TODO: P/Invoke tests do not yet handle 512-bit Vector + return; + } + else if (Vector.Count == 4) { TestVectorL256(); } diff --git a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorU.cs b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorU.cs index dbbb00991ddd55..fe7863513b3d32 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorU.cs +++ b/src/tests/Interop/PInvoke/Generics/GenericsTest.VectorU.cs @@ -69,7 +69,12 @@ public unsafe partial class GenericsTest [ActiveIssue("https://github.com/dotnet/runtimelab/issues/177", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))] public static void TestVectorU() { - if (Vector.Count == 8) + if (Vector.Count == 16) + { + // TODO: P/Invoke tests do not yet handle 512-bit Vector + return; + } + else if (Vector.Count == 8) { TestVectorU256(); } diff --git a/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/StaticReadonlySimd.cs b/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/StaticReadonlySimd.cs index df3b79a5077748..fff5bdbb5f8bd7 100644 --- a/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/StaticReadonlySimd.cs +++ b/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/StaticReadonlySimd.cs @@ -24,8 +24,8 @@ public static void TestEntryPoint() static readonly Vector2 v1 = new Vector2(-1.0f, 2.0f); static readonly Vector3 v2 = new Vector3(-1.0f, 2.0f, -0.0f); static readonly Vector4 v3 = new Vector4(-1.0f, 2.0f, -3.0f, 4.0f); - static readonly Vector v4 = new Vector(new long[] { 1,2,3,4 }); - static readonly Vector v5 = new Vector(new float[] { 1,2,3,4,5,6,7,8 }); + static readonly Vector v4 = Vector.CreateSequence(1L, 1); + static readonly Vector v5 = Vector.CreateSequence(1.0f, 1); static readonly Vector64 v6 = Vector64.Create(-3.14f); static readonly Vector64 v7 = Vector64.Create((long)42); static readonly Vector128 v8 = Vector128.Create((ulong)1111111111,2222222222); @@ -39,8 +39,8 @@ static void Test() AssertEquals(v1, new Vector2(-1.0f, 2.0f)); AssertEquals(v2, new Vector3(-1.0f, 2.0f, -0.0f)); AssertEquals(v3, new Vector4(-1.0f, 2.0f, -3.0f, 4.0f)); - AssertEquals(v4, new Vector(new long[] { 1,2,3,4 })); - AssertEquals(v5, new Vector(new float[] { 1,2,3,4,5,6,7,8 })); + AssertEquals(v4, Vector.CreateSequence(1L, 1)); + AssertEquals(v5, Vector.CreateSequence(1.0f, 1)); AssertEquals(v6, Vector64.Create(-3.14f)); AssertEquals(v7, Vector64.Create((long)42)); AssertEquals(v8, Vector128.Create((ulong)1111111111, 2222222222)); diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index d7c3e1aee3af89..e88f13dc80c7fc 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -381,25 +381,21 @@ public unsafe static void CpuId() testResult = Fail; } - if (Vector.Count == 16) + int vectorTByteLength = 16; + int maxVectorTBitWidth = (GetDotnetEnvVar("MaxVectorTBitWidth", defaultValue: 0) / 128) * 128; + + if ((maxVectorTBitWidth >= 512) && !isAvx512HierarchyDisabled) { - if (!isAvx2HierarchyDisabled) - { - Console.WriteLine($"{typeof(Vector).FullName}.Count returned 16 but the hardware returned 32"); - testResult = Fail; - } + vectorTByteLength = 64; } - else if (Vector.Count == 32) + else if ((maxVectorTBitWidth is 0 or >= 256) && !isAvx2HierarchyDisabled) { - if (isAvx2HierarchyDisabled) - { - Console.WriteLine($"{typeof(Vector).FullName}.Count returned 32 but the hardware returned 16"); - testResult = Fail; - } + vectorTByteLength = 32; } - else + + if (Vector.Count != vectorTByteLength) { - Console.WriteLine($"{typeof(Vector).FullName}.Count returned {Vector.Count} which is unexpected"); + Console.WriteLine($"{typeof(Vector).FullName}.Count returned {Vector.Count}. The expected value was {vectorTByteLength}."); testResult = Fail; } diff --git a/src/tests/JIT/Regression/JitBlue/GitHub_36614/GitHub_36614.cs b/src/tests/JIT/Regression/JitBlue/GitHub_36614/GitHub_36614.cs index 63807ab0f64032..1b68ed8f3eb972 100644 --- a/src/tests/JIT/Regression/JitBlue/GitHub_36614/GitHub_36614.cs +++ b/src/tests/JIT/Regression/JitBlue/GitHub_36614/GitHub_36614.cs @@ -44,11 +44,11 @@ internal void NarrowDouble() internal static double[] GenerateSource1() { - return new double[4] { 5.1, 5.2, 5.3, 5.4 }; + return new double[] { 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8 }; } internal static double[] GenerateSource2() { - return new double[4] { 6.1, 6.2, 6.3, 6.4 }; + return new double[] { 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8 }; } } } diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_34587/Runtime_34587.cs b/src/tests/JIT/Regression/JitBlue/Runtime_34587/Runtime_34587.cs index 0f80219aedc16d..27251128089a3a 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_34587/Runtime_34587.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_34587/Runtime_34587.cs @@ -713,7 +713,9 @@ static bool ValidateVectorT() if (Avx2IsSupported) { succeeded &= VectorIsHardwareAccelerated; - succeeded &= VectorByteCount == 32; + // MaxVectorTBitWidth env variable can be used to change Vector size. + // We can only assume it is at least 16 bytes. + succeeded &= VectorByteCount >= 16; } else if (Sse2IsSupported) { diff --git a/src/tests/JIT/SIMD/BitwiseOperations.cs b/src/tests/JIT/SIMD/BitwiseOperations.cs index 5d87fdfeb3d01f..fe59c06406efc1 100644 --- a/src/tests/JIT/SIMD/BitwiseOperations.cs +++ b/src/tests/JIT/SIMD/BitwiseOperations.cs @@ -24,12 +24,22 @@ static float NextFloat(Random random) return (float)(mantissa * exponent); } + static double[] GenerateDoubleArray(int size, Random random) + { + double[] arr = new double[size]; + for (int i = 0; i < size; ++i) + { + arr[i] = NextFloat(random); + } + return arr; + } + [Fact] public static int TestDouble() { Random random = new Random(Seed); - double[] arr1 = new double[] { NextFloat(random), NextFloat(random), NextFloat(random), NextFloat(random) }; - double[] arr2 = new double[] { NextFloat(random), NextFloat(random), NextFloat(random), NextFloat(random) }; + double[] arr1 = GenerateDoubleArray(System.Numerics.Vector.Count, random); + double[] arr2 = GenerateDoubleArray(System.Numerics.Vector.Count, random); var a = new System.Numerics.Vector(arr1); var b = new System.Numerics.Vector(arr2); var xorR = a ^ b; @@ -81,8 +91,8 @@ static byte[] GenerateByteArray(int size, Random random) public static int TestBool() { Random random = new Random(Seed); - byte[] arr1 = GenerateByteArray(64, random); - byte[] arr2 = GenerateByteArray(64, random); + byte[] arr1 = GenerateByteArray(System.Numerics.Vector.Count, random); + byte[] arr2 = GenerateByteArray(System.Numerics.Vector.Count, random); var a = new System.Numerics.Vector(arr1); var b = new System.Numerics.Vector(arr2); diff --git a/src/tests/JIT/SIMD/VectorGet.cs b/src/tests/JIT/SIMD/VectorGet.cs index 51c78f292664af..6e2ae799a8cfa3 100644 --- a/src/tests/JIT/SIMD/VectorGet.cs +++ b/src/tests/JIT/SIMD/VectorGet.cs @@ -106,6 +106,41 @@ public static int VectorGet(T value, int index) if (!CheckValue(A[30], value)) returnVal = Fail; if (!CheckValue(A[31], value)) returnVal = Fail; } + if (Vector.Count >= 64) + { + if (!CheckValue(A[32], value)) returnVal = Fail; + if (!CheckValue(A[33], value)) returnVal = Fail; + if (!CheckValue(A[34], value)) returnVal = Fail; + if (!CheckValue(A[35], value)) returnVal = Fail; + if (!CheckValue(A[36], value)) returnVal = Fail; + if (!CheckValue(A[37], value)) returnVal = Fail; + if (!CheckValue(A[38], value)) returnVal = Fail; + if (!CheckValue(A[39], value)) returnVal = Fail; + if (!CheckValue(A[40], value)) returnVal = Fail; + if (!CheckValue(A[41], value)) returnVal = Fail; + if (!CheckValue(A[42], value)) returnVal = Fail; + if (!CheckValue(A[43], value)) returnVal = Fail; + if (!CheckValue(A[44], value)) returnVal = Fail; + if (!CheckValue(A[45], value)) returnVal = Fail; + if (!CheckValue(A[46], value)) returnVal = Fail; + if (!CheckValue(A[47], value)) returnVal = Fail; + if (!CheckValue(A[48], value)) returnVal = Fail; + if (!CheckValue(A[49], value)) returnVal = Fail; + if (!CheckValue(A[50], value)) returnVal = Fail; + if (!CheckValue(A[51], value)) returnVal = Fail; + if (!CheckValue(A[52], value)) returnVal = Fail; + if (!CheckValue(A[53], value)) returnVal = Fail; + if (!CheckValue(A[54], value)) returnVal = Fail; + if (!CheckValue(A[55], value)) returnVal = Fail; + if (!CheckValue(A[56], value)) returnVal = Fail; + if (!CheckValue(A[57], value)) returnVal = Fail; + if (!CheckValue(A[58], value)) returnVal = Fail; + if (!CheckValue(A[59], value)) returnVal = Fail; + if (!CheckValue(A[60], value)) returnVal = Fail; + if (!CheckValue(A[61], value)) returnVal = Fail; + if (!CheckValue(A[62], value)) returnVal = Fail; + if (!CheckValue(A[63], value)) returnVal = Fail; + } return returnVal; } @@ -129,6 +164,7 @@ public static int VectorGetIndexerOutOfRange(T value, int index) case 8: check = A[8]; break; case 16: check = A[16]; break; case 32: check = A[32]; break; + case 64: check = A[64]; break; } } catch (ArgumentOutOfRangeException) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 8ec15aed2ff2fd..92e8f656f681af 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -194,6 +194,8 @@ static int Main() int byteVectorLength = 16; #elif VECTORT256_INTRINSICS int byteVectorLength = 32; +#elif VECTORT512_INTRINSICS + int byteVectorLength = 64; #else #error Who dis? #endif diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512_VectorT512.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512_VectorT512.csproj new file mode 100644 index 00000000000000..94084aebc41e6b --- /dev/null +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512_VectorT512.csproj @@ -0,0 +1,44 @@ + + + Exe + 0 + true + + true + + true + true + $(DefineConstants);AVX512_INTRINSICS;VECTORT512_INTRINSICS + true + false + + + + + + + + + /dev/null | grep -q __AVX512; then + echo No support for AVX512, test not applicable. + exit 0 + fi +]]> + + + + + + + + + diff --git a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId.cs b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId.cs deleted file mode 100644 index 722edc429d613b..00000000000000 --- a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId.cs +++ /dev/null @@ -1,453 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics.X86; -using System.Runtime.Intrinsics; -using System.Reflection; -using Xunit; - -namespace XarchHardwareIntrinsicTest._CpuId -{ - public class Program - { - const int Pass = 100; - const int Fail = 0; - - [Fact] - public unsafe static int TestEntryPoint() - { - int testResult = Pass; - - if (!X86Base.IsSupported) - { - return testResult; - } - - (int eax, int ebx, int ecx, int edx) = X86Base.CpuId(0x00000000, 0x00000000); - - bool isAuthenticAmd = (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65); - bool isGenuineIntel = (ebx == 0x756E6547) && (ecx == 0x6C65746E) && (edx == 0x49656E69); - bool isVirtualCPU = (ebx == 0x74726956) && (ecx == 0x20555043) && (edx == 0x206C6175); - - if (!isAuthenticAmd && !isGenuineIntel && !isVirtualCPU) - { - // CPUID checks are vendor specific and aren't guaranteed to match up, even across Intel/AMD - // as such, we limit ourselves to just AuthenticAMD, GenuineIntel and "Virtual CPU" right now. Any other - // vendors would need to be validated against the checks below and added to the list as necessary. - - // An example of a difference is Intel/AMD for LZCNT. While the same underlying bit is used to - // represent presence of the LZCNT instruction, AMD began using this bit around 2007 for its - // ABM instruction set, which indicates LZCNT and POPCNT. Intel introduced a separate bit for - // POPCNT and didn't actually implement LZCNT and begin using the LZCNT bit until 2013. So - // while everything happens to line up today, it doesn't always and may not always do so. - - Console.WriteLine($"Unrecognized CPU vendor: EBX: {ebx:X8}, ECX: {ecx:X8}, EDX: {edx:X8}"); - testResult = Fail; - } - - uint maxFunctionId = (uint)eax; - - if (maxFunctionId < 0x00000001) - { - return testResult; - } - - bool isX86BaseDisabled = !GetDotnetEnable("HWINTRINSIC"); - bool isHierarchyDisabled = isX86BaseDisabled; - - (eax, ebx, ecx, edx) = X86Base.CpuId(0x00000001, 0x00000000); - - int xarchCpuInfo = eax; - - if (IsBitIncorrect(edx, 25, typeof(Sse), Sse.IsSupported, "SSE", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(edx, 26, typeof(Sse2), Sse2.IsSupported, "SSE2", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - bool isSse2HierarchyDisabled = isHierarchyDisabled; - - if (IsBitIncorrect(ecx, 25, typeof(Aes), Aes.IsSupported, "AES", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - isHierarchyDisabled = isSse2HierarchyDisabled; - - if (IsBitIncorrect(ecx, 1, typeof(Pclmulqdq), Pclmulqdq.IsSupported, "PCLMULQDQ", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - isHierarchyDisabled = isSse2HierarchyDisabled | !GetDotnetEnable("SSE3_4"); - - if (IsBitIncorrect(ecx, 0, typeof(Sse3), Sse3.IsSupported, "SSE3", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ecx, 9, typeof(Ssse3), Ssse3.IsSupported, "SSSE3", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ecx, 19, typeof(Sse41), Sse41.IsSupported, "SSE41", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ecx, 20, typeof(Sse42), Sse42.IsSupported, "SSE42", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - bool isSse42HierarchyDisabled = isHierarchyDisabled; - - if (IsBitIncorrect(ecx, 23, typeof(Popcnt), Popcnt.IsSupported, "POPCNT", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - isHierarchyDisabled = isSse42HierarchyDisabled; - - if (IsBitIncorrect(ecx, 28, typeof(Avx), Avx.IsSupported, "AVX", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - bool isAvxHierarchyDisabled = isHierarchyDisabled; - - if (IsBitIncorrect(ecx, 12, typeof(Fma), Fma.IsSupported, "FMA", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - bool isFmaHierarchyDisabled = isHierarchyDisabled; - - if (maxFunctionId < 0x00000007) - { - return testResult; - } - - (eax, ebx, ecx, edx) = X86Base.CpuId(0x00000007, 0x00000000); - - isHierarchyDisabled = isAvxHierarchyDisabled; - - if (IsBitIncorrect(ebx, 5, typeof(Avx2), Avx2.IsSupported, "AVX2", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - bool isAvx2HierarchyDisabled = isHierarchyDisabled; - - isHierarchyDisabled = isAvxHierarchyDisabled; - - if (IsBitIncorrect(ebx, 3, typeof(Bmi1), Bmi1.IsSupported, "BMI1", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - isHierarchyDisabled = isAvxHierarchyDisabled; - - if (IsBitIncorrect(ebx, 8, typeof(Bmi2), Bmi2.IsSupported, "BMI2", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - isHierarchyDisabled = isAvx2HierarchyDisabled | isFmaHierarchyDisabled | OperatingSystem.IsMacOS(); - - for (int i = 0; i < 2; i++) - { - // The runtime currently requires that all of F + BW + CD + DQ + VL be supported together or none - // are supported. To handle this we simple check them all twice so that if any of them are disabled - // the first time around, we'll then assert that they are all actually disabled the second time around - - if (IsBitIncorrect(ebx, 16, typeof(Avx512F), Avx512F.IsSupported, "AVX512F", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ebx, 31, typeof(Avx512F.VL), Avx512F.VL.IsSupported, "AVX512F_VL", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ebx, 30, typeof(Avx512BW), Avx512BW.IsSupported, "AVX512BW", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ebx, 30, typeof(Avx512BW.VL), Avx512BW.VL.IsSupported, "AVX512BW_VL", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ebx, 28, typeof(Avx512CD), Avx512CD.IsSupported, "AVX512CD", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ebx, 28, typeof(Avx512CD.VL), Avx512CD.VL.IsSupported, "AVX512CD_VL", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ebx, 17, typeof(Avx512DQ), Avx512DQ.IsSupported, "AVX512DQ", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ebx, 17, typeof(Avx512DQ.VL), Avx512DQ.VL.IsSupported, "AVX512DQ_VL", ref isHierarchyDisabled)) - { - testResult = Fail; - } - } - - bool isAvx512HierarchyDisabled = isHierarchyDisabled; - - int preferredVectorBitWidth = (GetDotnetEnvVar("PreferredVectorBitWidth", defaultValue: 0) / 128) * 128; - int preferredVectorByteLength = preferredVectorBitWidth / 8; - - if (preferredVectorByteLength == 0) - { - bool isVector512Throttling = false; - - if (isGenuineIntel) - { - int steppingId = xarchCpuInfo & 0b1111; - int model = (xarchCpuInfo >> 4) & 0b1111; - int familyID = (xarchCpuInfo >> 8) & 0b1111; - int extendedModelID = (xarchCpuInfo >> 16) & 0b1111; - - if (familyID == 0x06) - { - if (extendedModelID == 0x05) - { - if (model == 0x05) - { - // * Skylake (Server) - // * Cascade Lake - // * Cooper Lake - - isVector512Throttling = true; - } - } - else if (extendedModelID == 0x06) - { - if (model == 0x06) - { - // * Cannon Lake - - isVector512Throttling = true; - } - } - } - } - - if (isAvx512HierarchyDisabled || isVector512Throttling) - { - preferredVectorByteLength = 256 / 8; - } - else - { - preferredVectorByteLength = 512 / 8; - } - } - - if (IsBitIncorrect(ecx, 1, typeof(Avx512Vbmi), Avx512Vbmi.IsSupported, "AVX512VBMI", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsBitIncorrect(ecx, 1, typeof(Avx512Vbmi.VL), Avx512Vbmi.VL.IsSupported, "AVX512VBMI_VL", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - isHierarchyDisabled = isX86BaseDisabled; - - if (IsBitIncorrect(edx, 14, typeof(X86Serialize), X86Serialize.IsSupported, "SERIALIZE", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - (eax, ebx, ecx, edx) = X86Base.CpuId(0x00000007, 0x00000001); - - isHierarchyDisabled = isAvx2HierarchyDisabled; - -#pragma warning disable CA2252 // No need to opt into preview feature for an internal test - if (IsBitIncorrect(eax, 4, typeof(AvxVnni), AvxVnni.IsSupported, "AVXVNNI", ref isHierarchyDisabled)) - { - testResult = Fail; - } -#pragma warning restore CA2252 - - (eax, ebx, ecx, edx) = X86Base.CpuId(unchecked((int)0x80000000), 0x00000000); - - uint maxFunctionIdEx = (uint)eax; - - if (maxFunctionIdEx < 0x00000001) - { - return testResult; - } - - (eax, ebx, ecx, edx) = X86Base.CpuId(unchecked((int)0x80000001), 0x00000000); - - isHierarchyDisabled = isX86BaseDisabled; - - if (IsBitIncorrect(ecx, 5, typeof(Lzcnt), Lzcnt.IsSupported, "LZCNT", ref isHierarchyDisabled)) - { - testResult = Fail; - } - - if (IsIncorrect(typeof(Vector64), Vector64.IsHardwareAccelerated, isHierarchyDisabled: true)) - { - testResult = Fail; - } - - if (IsIncorrect(typeof(Vector128), Vector128.IsHardwareAccelerated, isSse2HierarchyDisabled)) - { - testResult = Fail; - } - - if (IsIncorrect(typeof(Vector256), Vector256.IsHardwareAccelerated, isAvx2HierarchyDisabled || (preferredVectorByteLength < 32))) - { - testResult = Fail; - } - - if (IsIncorrect(typeof(Vector512), Vector512.IsHardwareAccelerated, isAvx512HierarchyDisabled || (preferredVectorByteLength < 64))) - { - testResult = Fail; - } - - if (IsIncorrect(typeof(Vector), Vector.IsHardwareAccelerated, isSse2HierarchyDisabled)) - { - testResult = Fail; - } - - if (Vector.Count == 16) - { - if (!isAvx2HierarchyDisabled) - { - Console.WriteLine($"{typeof(Vector).FullName}.Count returned 16 but the hardware returned 32"); - testResult = Fail; - } - } - else if (Vector.Count == 32) - { - if (isAvx2HierarchyDisabled) - { - Console.WriteLine($"{typeof(Vector).FullName}.Count returned 32 but the hardware returned 16"); - testResult = Fail; - } - } - else - { - Console.WriteLine($"{typeof(Vector).FullName}.Count returned {Vector.Count} which is unexpected"); - testResult = Fail; - } - - if (Vector.Count != (int)typeof(Vector).GetProperty("Count")!.GetValue(null)!) - { - Console.WriteLine($"{typeof(Vector).FullName}.Count returned a different result when called via reflection"); - testResult = Fail; - } - - return testResult; - } - - static bool IsBitIncorrect(int register, int bitNumber, Type isa, bool isSupported, string name, ref bool isHierarchyDisabled) - { - bool isSupportedByHardware = (register & (1 << bitNumber)) != 0; - isHierarchyDisabled |= (!isSupported || !GetDotnetEnable(name)); - - if (isSupported) - { - if (!isSupportedByHardware) - { - Console.WriteLine($"{isa.FullName}.IsSupported returned true but the hardware returned false"); - return true; - } - - if (isHierarchyDisabled) - { - Console.WriteLine($"{isa.FullName}.IsSupported returned true but the runtime returned false"); - return true; - } - } - else if (isSupportedByHardware) - { - if (!isHierarchyDisabled) - { - Console.WriteLine($"{isa.FullName}.IsSupported returned false but the hardware and runtime returned true"); - return true; - } - } - else - { - // The IsSupported query returned false and the hardware - // says its unsupported, so we're all good - } - - if (isSupported != (bool)isa.GetProperty("IsSupported")!.GetValue(null)!) - { - Console.WriteLine($"{isa.FullName}.IsSupported returned a different result when called via reflection"); - return true; - } - - return false; - } - - static bool IsIncorrect(Type isa, bool isHardwareAccelerated, bool isHierarchyDisabled) - { - if (isHardwareAccelerated) - { - if (isHierarchyDisabled) - { - Console.WriteLine($"{isa.FullName}.IsHardwareAccelerated returned true but the runtime returned false"); - return true; - } - } - else if (!isHierarchyDisabled) - { - Console.WriteLine($"{isa.FullName}.IsHardwareAccelerated returned false but the hardware and runtime returned true"); - return true; - } - - if (isHardwareAccelerated != (bool)isa.GetProperty("IsHardwareAccelerated")!.GetValue(null)!) - { - Console.WriteLine($"{isa.FullName}.IsHardwareAccelerated returned a different result when called via reflection"); - return true; - } - - return false; - } - - static bool GetDotnetEnable(string name) - { - // Hardware Intrinsic configuration knobs default to true - return GetDotnetEnvVar($"Enable{name}", defaultValue: 1) != 0; - } - - static int GetDotnetEnvVar(string name, int defaultValue) - { - string? stringValue = Environment.GetEnvironmentVariable($"DOTNET_{name}"); - - if ((stringValue is null) || !int.TryParse(stringValue, out int value)) - { - return defaultValue; - } - - return value; - } - } -} diff --git a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx.csproj b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx.csproj index 1bc209963a25b1..0357e6e60be285 100644 --- a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx.csproj +++ b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx.csproj @@ -17,6 +17,6 @@ - + diff --git a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx2.csproj b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx2.csproj index 67381af11a88a2..e3ec99a8ff24a3 100644 --- a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx2.csproj +++ b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx2.csproj @@ -17,6 +17,6 @@ - + diff --git a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx512.csproj b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx512.csproj index a28a93f0f16d61..f0d2f45a2e120f 100644 --- a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx512.csproj +++ b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx512.csproj @@ -17,6 +17,6 @@ - + diff --git a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx_NoAvx2.csproj b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx_NoAvx2.csproj index 6c6f2278d69527..893f58cc177167 100644 --- a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx_NoAvx2.csproj +++ b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx_NoAvx2.csproj @@ -17,6 +17,6 @@ - + diff --git a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Baseline.csproj b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Baseline.csproj index 419c5e21f2e62f..df43d1cbeb8286 100644 --- a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Baseline.csproj +++ b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Baseline.csproj @@ -13,6 +13,6 @@ - + diff --git a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Sse42.csproj b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Sse42.csproj index fca78ef4941a72..43fa033f3708f8 100644 --- a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Sse42.csproj +++ b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Sse42.csproj @@ -17,6 +17,6 @@ - + From eae32e94ab4919235cb1d4721117849528d32706 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Wed, 15 Jan 2025 16:19:17 -0800 Subject: [PATCH 2/7] fix size check --- src/coreclr/jit/hwintrinsicxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 9e8f195f387701..55aa44bbca94da 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1648,7 +1648,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case TYP_SIMD64: { // Vector is larger, so we should treat this as a call to the appropriate narrowing intrinsic - intrinsic = simdSize == TYP_SIMD32 ? NI_Vector256_GetLower : NI_Vector512_GetLower128; + intrinsic = simdSize == YMM_REGSIZE_BYTES ? NI_Vector256_GetLower : NI_Vector512_GetLower128; return impSpecialIntrinsic(intrinsic, clsHnd, method, sig R2RARG(&emptyEntryPoint), simdBaseJitType, retType, simdSize, mustExpand); From 699f15e74bd2ce9e345a8c1f9c21860196d09a18 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 16 Jan 2025 10:16:46 -0800 Subject: [PATCH 3/7] add MaxVectorTBitWidth configs to jitstress-isas-x86 pipeline --- eng/pipelines/common/templates/runtimes/run-test-job.yml | 3 +++ src/tests/Common/testenvironment.proj | 1 + 2 files changed, 4 insertions(+) diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index 551a502ae9ed60..29f814e3ae6fe8 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -386,6 +386,9 @@ jobs: - jitstress_isas_x86_nosse41 - jitstress_isas_x86_nosse42 - jitstress_isas_x86_nossse3 + - jitstress_isas_x86_vectort128 + - jitstress_isas_x86_vectort512 + - jitstress_isas_x86_noavx512_vectort128 - jitstress_isas_1_x86_noaes - jitstress_isas_1_x86_noavx - jitstress_isas_1_x86_noavx2 diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 5f543464b9f051..0ef77761674105 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -137,6 +137,7 @@ + From 94cb2c5a25a4750c0c8029db6c1996a23ef53caf Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 16 Jan 2025 20:46:39 -0800 Subject: [PATCH 4/7] clamp Vector size to largest accelerated fixed-sized vector --- src/coreclr/inc/clrconfigvalues.h | 3 ++ src/coreclr/vm/codeman.cpp | 50 +++++++++++++++---- src/tests/Common/testenvironment.proj | 2 +- .../X86/General/IsSupported.cs | 5 ++ .../HardwareIntrinsics/X86/X86Base/CpuId.cs | 4 +- 5 files changed, 50 insertions(+), 14 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 7e23d58858dd93..f010644c5caed7 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -686,6 +686,9 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame" #endif RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum decimal width, in bits, that Vector is allowed to be. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10) +#if defined(TARGET_AMD64) || defined(TARGET_X86) +RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PreferredVectorBitWidth, W("PreferredVectorBitWidth"), 0, "The maximum decimal width, in bits, of fixed-width vectors that may be considered hardware accelerated. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10) +#endif // defined(TARGET_AMD64) || defined(TARGET_X86) // // Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 50648840fedf94..d82fa0f4f7f102 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1548,17 +1548,6 @@ void EEJitManager::SetCpuInfo() #if defined(TARGET_X86) || defined(TARGET_AMD64) - // Clean up mutually exclusive ISAs - if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) - { - CPUCompileFlags.Clear(InstructionSet_VectorT256); - CPUCompileFlags.Clear(InstructionSet_VectorT128); - } - else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256)) - { - CPUCompileFlags.Clear(InstructionSet_VectorT128); - } - int cpuidInfo[4]; const int CPUID_EAX = 0; @@ -1625,6 +1614,45 @@ void EEJitManager::SetCpuInfo() } } } + + // JIT maps Vector to Vector128, Vector256, or Vector512 for the purposes of most intrinsic resolution. + // If JIT reports that the corresponding fixed-width vector class is not hardware accelerated, that will + // mean Vector is also reported as not accelerated, so we will limit Vector size using the same rules. + // This logic must be kept in sync with Compiler::compSetProcessor/Compiler::getPreferredVectorByteLength. + + uint32_t preferredVectorBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PreferredVectorBitWidth) / 128) * 128; + + if ((preferredVectorBitWidth == 0) && CPUCompileFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING)) + { + preferredVectorBitWidth = 256; + } + + if (preferredVectorBitWidth != 0) + { + if (CPUCompileFlags.IsSet(InstructionSet_VectorT512) && (preferredVectorBitWidth < 512)) + { + CPUCompileFlags.Clear(InstructionSet_VectorT512); + } + + if (CPUCompileFlags.IsSet(InstructionSet_VectorT256) && (preferredVectorBitWidth < 256)) + { + CPUCompileFlags.Clear(InstructionSet_VectorT256); + } + } + + // Only one VectorT ISA can be set, and we have validated that anything left in the flags is supported + // by both the hardware and the config. Remove everything less than the largest supported. + + if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) + { + CPUCompileFlags.Clear(InstructionSet_VectorT256); + CPUCompileFlags.Clear(InstructionSet_VectorT128); + } + else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256)) + { + CPUCompileFlags.Clear(InstructionSet_VectorT128); + } + #endif // TARGET_X86 || TARGET_AMD64 m_CPUCompileFlags = CPUCompileFlags; diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 0ef77761674105..46d0b2cb1fa5fe 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -136,7 +136,7 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs index 18241d9ecfc40d..c1769def972831 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs @@ -19,6 +19,11 @@ public static void IsSupported() if (Sse.IsSupported && int.TryParse(Environment.GetEnvironmentVariable("DOTNET_EnableIncompleteISAClass"), out var enableIncompleteIsa) && (enableIncompleteIsa != 0)) { // X86 platforms + if (Vector.Count == 64 && !Avx512F.IsSupported) + { + result = false; + } + if (Vector.Count == 32 && !Avx2.IsSupported) { result = false; diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index e88f13dc80c7fc..aeeb847495fd5a 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -386,11 +386,11 @@ public unsafe static void CpuId() if ((maxVectorTBitWidth >= 512) && !isAvx512HierarchyDisabled) { - vectorTByteLength = 64; + vectorTByteLength = int.Min(64, preferredVectorByteLength); } else if ((maxVectorTBitWidth is 0 or >= 256) && !isAvx2HierarchyDisabled) { - vectorTByteLength = 32; + vectorTByteLength = int.Min(32, preferredVectorByteLength); } if (Vector.Count != vectorTByteLength) From 2c264365d9bccd352ad6431f61c22e4c404537a1 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Sat, 18 Jan 2025 17:30:26 -0800 Subject: [PATCH 5/7] move PreferredVectorBitWidth logic to VM --- src/coreclr/inc/corjitflags.h | 4 -- src/coreclr/inc/jiteeversionguid.h | 10 ++-- src/coreclr/jit/compiler.cpp | 47 +++++++++++-------- src/coreclr/jit/jitconfigvalues.h | 2 - src/coreclr/jit/jitee.h | 8 ---- .../tools/Common/InstructionSetHelpers.cs | 31 ++++++++++++ .../tools/Common/JitInterface/CorInfoImpl.cs | 3 -- .../tools/Common/JitInterface/CorInfoTypes.cs | 3 -- .../superpmi-shared/methodcontext.cpp | 5 -- src/coreclr/vm/codeman.cpp | 28 +++++++---- 10 files changed, 82 insertions(+), 59 deletions(-) diff --git a/src/coreclr/inc/corjitflags.h b/src/coreclr/inc/corjitflags.h index b7de9711f07f79..f6eb983953f95e 100644 --- a/src/coreclr/inc/corjitflags.h +++ b/src/coreclr/inc/corjitflags.h @@ -64,10 +64,6 @@ class CORJIT_FLAGS CORJIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif -#if defined(TARGET_X86) || defined(TARGET_AMD64) - CORJIT_FLAG_VECTOR512_THROTTLING = 31, // On x86/x64, 512-bit vector usage may incur CPU frequency throttling -#endif - }; CORJIT_FLAGS() diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 2db16d00df135b..81b071438a4482 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* cc0e7adf-e397-40b6-9d14-a7149815c991 */ - 0xcc0e7adf, - 0xe397, - 0x40b6, - {0x9d, 0x14, 0xa7, 0x14, 0x98, 0x15, 0xc9, 0x91} +constexpr GUID JITEEVersionIdentifier = { /* 78acb599-d9be-4ea1-8e93-546ec43e0487 */ + 0x78acb599, + 0xd9be, + 0x4ea1, + {0x8e, 0x93, 0x54, 0x6e, 0xc4, 0x3e, 0x04, 0x87} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5aab07895a7b85..44987366a3ffbc 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2213,9 +2213,34 @@ void Compiler::compSetProcessor() // don't actually exist. The JIT is in charge of adding those and ensuring // the total sum of flags is still valid. #if defined(TARGET_XARCH) - // Get the preferred vector bitwidth, rounding down to the nearest multiple of 128-bits - uint32_t preferredVectorBitWidth = (ReinterpretHexAsDecimal(JitConfig.PreferredVectorBitWidth()) / 128) * 128; - uint32_t preferredVectorByteLength = preferredVectorBitWidth / 8; + // If the VM passed in a virtual vector ISA, it was done to communicate PreferredVectorBitWidth. + // No check is done for the validity of the value, since it will be clamped to max supported by + // hardware and config when queried. We will, therefore, remove the marker ISA and allow it to + // be re-added if appropriate based on the hardware ISA evaluations below. + + uint32_t preferredVectorBitWidth = 0; + if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector128)) + { + instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector128); + preferredVectorBitWidth = 128; + } + else if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector256)) + { + instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector256); + preferredVectorBitWidth = 256; + } + else if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector512)) + { + instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector512); + preferredVectorBitWidth = 512; + } + + opts.preferredVectorByteLength = preferredVectorBitWidth / 8; + + // Only one marker ISA should have been passed in, and it should now be cleared. + assert(!instructionSetFlags.HasInstructionSet(InstructionSet_Vector128) && + !instructionSetFlags.HasInstructionSet(InstructionSet_Vector256) && + !instructionSetFlags.HasInstructionSet(InstructionSet_Vector512)); if (instructionSetFlags.HasInstructionSet(InstructionSet_SSE)) { @@ -2247,20 +2272,6 @@ void Compiler::compSetProcessor() assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)); instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); - - if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING)) - { - // Some architectures can experience frequency throttling when - // executing 512-bit width instructions. To account for this we set the - // default preferred vector width to 256-bits in some scenarios. Power - // users can override this with `DOTNET_PreferredVectorBitWidth=512` to - // allow using such instructions where hardware support is available. - // - // Do not condition this based on stress mode as it makes the support - // reported inconsistent across methods and breaks expectations/functionality - - preferredVectorByteLength = 256 / 8; - } } else { @@ -2268,8 +2279,6 @@ void Compiler::compSetProcessor() assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX10v1)); } } - - opts.preferredVectorByteLength = preferredVectorByteLength; #elif defined(TARGET_ARM64) if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd)) { diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 7b82ac1afc22cb..5beafd1ccd6928 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -380,8 +380,6 @@ CONFIG_INTEGER(JitBypassAPXCheck, "JitBypassAPXCheck", 0) // Bypass APX CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0) #endif -RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth, "PreferredVectorBitWidth", 0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default. - // // Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h // diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index edfbafc917d470..8c6e82ec7b9dd3 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -44,10 +44,6 @@ class JitFlags JIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif -#if defined(TARGET_XARCH) - JIT_FLAG_VECTOR512_THROTTLING = 31, // On Xarch, 512-bit vector usage may incur CPU frequency throttling -#endif - // Note: the mcs tool uses the currently unused upper flags bits when outputting SuperPMI MC file flags. // See EXTRA_JIT_FLAGS and spmidumphelper.cpp. Currently, these are bits 56 through 63. If they overlap, // something needs to change. @@ -147,10 +143,6 @@ class JitFlags FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SOFTFP_ABI, JIT_FLAG_SOFTFP_ABI); #endif // TARGET_ARM -#if defined(TARGET_X86) || defined(TARGET_AMD64) - FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING, JIT_FLAG_VECTOR512_THROTTLING); -#endif // TARGET_ARM - #undef FLAGS_EQUAL } diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 14940fc74db9e5..f41b1adc7b8709 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -252,6 +252,37 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSet.Remove(unsupportedInstructionSet); optimisticInstructionSet.Add(supportedInstructionSet); + if (flags.HasFlag(InstructionSetSupportFlags.Vector512Throttling)) + { + Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F); + if (supportedInstructionSet.HasInstructionSet(InstructionSet.X86_AVX512F)) + { + Debug.Assert(InstructionSet.X86_Vector256 == InstructionSet.X64_Vector256); + Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256); + Debug.Assert(InstructionSet.X86_VectorT512 == InstructionSet.X64_VectorT512); + + // AVX-512 is supported, but we are compiling specifically for hardware that has a performance penalty for + // using 512-bit ops. We want to tell JIT not to consider Vector512 to be hardware accelerated, which we do + // by passing a PreferredVectorBitWidth value, in the form of a virtual vector ISA of the appropriate size. + // + // If we are downgrading the max accelerated vector size, we also need to downgrade Vector size. + + supportedInstructionSet.AddInstructionSet(InstructionSet.X86_Vector256); + + if (supportedInstructionSet.HasInstructionSet(InstructionSet.X86_VectorT512)) + { + supportedInstructionSet.RemoveInstructionSet(InstructionSet.X86_VectorT512); + supportedInstructionSet.AddInstructionSet(InstructionSet.X86_VectorT256); + } + + if (optimisticInstructionSet.HasInstructionSet(InstructionSet.X86_VectorT512)) + { + optimisticInstructionSet.RemoveInstructionSet(InstructionSet.X86_VectorT512); + optimisticInstructionSet.AddInstructionSet(InstructionSet.X86_VectorT256); + } + } + } + return new InstructionSetSupport(supportedInstructionSet, unsupportedInstructionSet, optimisticInstructionSet, diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index c635b416708be2..ee5091376c87d5 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -4216,9 +4216,6 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes) case TargetArchitecture.X86: Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2); Debug.Assert(_compilation.InstructionSetSupport.IsInstructionSetSupported(InstructionSet.X86_SSE2)); - - if ((_compilation.InstructionSetSupport.Flags & InstructionSetSupportFlags.Vector512Throttling) != 0) - flags.Set(CorJitFlag.CORJIT_FLAG_VECTOR512_THROTTLING); break; case TargetArchitecture.ARM64: diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 03dff074620ac2..7ca4e5b0c1d3ad 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1405,9 +1405,6 @@ public enum CorJitFlag : uint // ARM only CORJIT_FLAG_RELATIVE_CODE_RELOCS = 29, // JIT should generate PC-relative address computations instead of EE relocation records CORJIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention - - // x86/x64 only - CORJIT_FLAG_VECTOR512_THROTTLING = 31, // On x86/x64, 512-bit vector usage may incur CPU frequency throttling } public struct CORJIT_FLAGS diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index cb831838f21756..6c8f6a55d8ef0c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -1203,11 +1203,6 @@ const char* CorJitFlagToString(CORJIT_FLAGS::CorJitFlag flag) return "CORJIT_FLAG_SOFTFP_ABI"; #endif // defined(TARGET_ARM) -#if defined(TARGET_X86) || defined(TARGET_AMD64) - case CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_VECTOR512_THROTTLING: - return "CORJIT_FLAG_VECTOR512_THROTTLING"; -#endif // defined(TARGET_XARCH) - default: return ""; } diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index d82fa0f4f7f102..6ac1b8feafdf7c 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1548,6 +1548,7 @@ void EEJitManager::SetCpuInfo() #if defined(TARGET_X86) || defined(TARGET_AMD64) + bool throttleVector512 = false; int cpuidInfo[4]; const int CPUID_EAX = 0; @@ -1600,7 +1601,7 @@ void EEJitManager::SetCpuInfo() // * Cascade Lake // * Cooper Lake - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING); + throttleVector512 = true; } } else if (xarchCpuInfo.ExtendedModelId == 0x06) @@ -1609,33 +1610,40 @@ void EEJitManager::SetCpuInfo() { // * Cannon Lake - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING); + throttleVector512 = true; } } } } - // JIT maps Vector to Vector128, Vector256, or Vector512 for the purposes of most intrinsic resolution. - // If JIT reports that the corresponding fixed-width vector class is not hardware accelerated, that will - // mean Vector is also reported as not accelerated, so we will limit Vector size using the same rules. - // This logic must be kept in sync with Compiler::compSetProcessor/Compiler::getPreferredVectorByteLength. + // If we have a PreferredVectorBitWidth, we will pass that to JIT in the form of a virtual vector ISA of the + // appropriate size. We will also clamp the max Vector size to be no larger than PreferredVectorBitWidth, + // because JIT maps Vector to the fixed-width vector of matching size for the purposes of intrinsic + // resolution. We want to avoid a situation where e.g. Vector.IsHardwareAccelerated returns false + // because Vector512.IsHardwareAccelerated returns false due to config or automatic throttling. uint32_t preferredVectorBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PreferredVectorBitWidth) / 128) * 128; - if ((preferredVectorBitWidth == 0) && CPUCompileFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING)) + if ((preferredVectorBitWidth == 0) && throttleVector512) { preferredVectorBitWidth = 256; } if (preferredVectorBitWidth != 0) { - if (CPUCompileFlags.IsSet(InstructionSet_VectorT512) && (preferredVectorBitWidth < 512)) + if (preferredVectorBitWidth >= 512) { + CPUCompileFlags.Set(InstructionSet_Vector512); + } + else if (preferredVectorBitWidth >= 256) + { + CPUCompileFlags.Set(InstructionSet_Vector256); CPUCompileFlags.Clear(InstructionSet_VectorT512); } - - if (CPUCompileFlags.IsSet(InstructionSet_VectorT256) && (preferredVectorBitWidth < 256)) + else { + CPUCompileFlags.Set(InstructionSet_Vector128); + CPUCompileFlags.Clear(InstructionSet_VectorT512); CPUCompileFlags.Clear(InstructionSet_VectorT256); } } From c39e40efee0d974913b13ece585b2c904f268b91 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Sun, 19 Jan 2025 07:46:43 -0800 Subject: [PATCH 6/7] tidying --- .../tools/Common/InstructionSetHelpers.cs | 2 +- src/coreclr/vm/codeman.cpp | 29 +++++++++---------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index f41b1adc7b8709..cd232af1dd9570 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -235,7 +235,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512"); } } - else if (targetArchitecture == TargetArchitecture.ARM64) + else if (allowOptimistic && targetArchitecture == TargetArchitecture.ARM64) { optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("crc"); diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 6ac1b8feafdf7c..9911f20d3d2664 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1629,23 +1629,20 @@ void EEJitManager::SetCpuInfo() preferredVectorBitWidth = 256; } - if (preferredVectorBitWidth != 0) + if (preferredVectorBitWidth >= 512) { - if (preferredVectorBitWidth >= 512) - { - CPUCompileFlags.Set(InstructionSet_Vector512); - } - else if (preferredVectorBitWidth >= 256) - { - CPUCompileFlags.Set(InstructionSet_Vector256); - CPUCompileFlags.Clear(InstructionSet_VectorT512); - } - else - { - CPUCompileFlags.Set(InstructionSet_Vector128); - CPUCompileFlags.Clear(InstructionSet_VectorT512); - CPUCompileFlags.Clear(InstructionSet_VectorT256); - } + CPUCompileFlags.Set(InstructionSet_Vector512); + } + else if (preferredVectorBitWidth >= 256) + { + CPUCompileFlags.Set(InstructionSet_Vector256); + CPUCompileFlags.Clear(InstructionSet_VectorT512); + } + else if (preferredVectorBitWidth != 0) + { + CPUCompileFlags.Set(InstructionSet_Vector128); + CPUCompileFlags.Clear(InstructionSet_VectorT512); + CPUCompileFlags.Clear(InstructionSet_VectorT256); } // Only one VectorT ISA can be set, and we have validated that anything left in the flags is supported From 0d438c863cd36dc9e88ed77ddb8262a11c689a81 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Sun, 19 Jan 2025 07:46:57 -0800 Subject: [PATCH 7/7] tidying2 --- .../Common/Compiler/InstructionSetSupport.cs | 12 +----------- .../tools/Common/InstructionSetHelpers.cs | 18 +++++++++--------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index e611d18c630330..de972aced7c565 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -11,12 +11,6 @@ namespace ILCompiler { - [Flags] - public enum InstructionSetSupportFlags - { - Vector512Throttling = 0x1, - } - public class InstructionSetSupport { private readonly TargetArchitecture _targetArchitecture; @@ -24,21 +18,19 @@ public class InstructionSetSupport private readonly InstructionSetFlags _supportedInstructionSets; private readonly InstructionSetFlags _unsupportedInstructionSets; private readonly InstructionSetFlags _nonSpecifiableInstructionSets; - private readonly InstructionSetSupportFlags _flags; public InstructionSetSupport(InstructionSetFlags supportedInstructionSets, InstructionSetFlags unsupportedInstructionSets, TargetArchitecture architecture) : this(supportedInstructionSets, unsupportedInstructionSets, supportedInstructionSets, default(InstructionSetFlags), architecture) { } - public InstructionSetSupport(InstructionSetFlags supportedInstructionSets, InstructionSetFlags unsupportedInstructionSets, InstructionSetFlags optimisticInstructionSets, InstructionSetFlags nonSpecifiableInstructionSets, TargetArchitecture architecture, InstructionSetSupportFlags flags = 0) + public InstructionSetSupport(InstructionSetFlags supportedInstructionSets, InstructionSetFlags unsupportedInstructionSets, InstructionSetFlags optimisticInstructionSets, InstructionSetFlags nonSpecifiableInstructionSets, TargetArchitecture architecture) { _supportedInstructionSets = supportedInstructionSets; _unsupportedInstructionSets = unsupportedInstructionSets; _optimisticInstructionSets = optimisticInstructionSets; _targetArchitecture = architecture; _nonSpecifiableInstructionSets = nonSpecifiableInstructionSets; - _flags = flags; } public bool IsInstructionSetSupported(InstructionSet instructionSet) @@ -63,8 +55,6 @@ public bool IsInstructionSetExplicitlyUnsupported(InstructionSet instructionSet) public TargetArchitecture Architecture => _targetArchitecture; - public InstructionSetSupportFlags Flags => _flags; - public static string GetHardwareIntrinsicId(TargetArchitecture architecture, TypeDesc potentialTypeDesc) { if (!potentialTypeDesc.IsIntrinsic || !(potentialTypeDesc is MetadataType potentialType)) diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index cd232af1dd9570..f9e9361fb462c4 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -20,7 +20,6 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru string mustNotBeMessage, string invalidImplicationMessage, Logger logger, bool optimizingForSize = false) { InstructionSetSupportBuilder instructionSetSupportBuilder = new(targetArchitecture); - InstructionSetSupportFlags flags = 0; // Ready to run images are built with certain instruction set baselines if ((targetArchitecture == TargetArchitecture.X86) || (targetArchitecture == TargetArchitecture.X64)) @@ -45,6 +44,8 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru // compile both branches of IsSupported checks. bool allowOptimistic = !optimizingForSize; + bool throttleAvx512 = false; + if (instructionSet == "native") { // We're compiling for a specific chip @@ -92,7 +93,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru // * Cascade Lake // * Cooper Lake - flags |= InstructionSetSupportFlags.Vector512Throttling; + throttleAvx512 = true; } } else if (extendedModel == 0x06) @@ -101,13 +102,13 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru { // * Cannon Lake - flags |= InstructionSetSupportFlags.Vector512Throttling; + throttleAvx512 = true; } } } } - if ((flags & InstructionSetSupportFlags.Vector512Throttling) != 0 && logger.IsVerbose) + if (throttleAvx512 && logger.IsVerbose) logger.LogMessage("Vector512 is throttled"); } @@ -180,7 +181,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(instructionSetSupportBuilder); // Optimistically assume some instruction sets are present. - if (allowOptimistic && (targetArchitecture == TargetArchitecture.X86 || targetArchitecture == TargetArchitecture.X64)) + if (allowOptimistic && targetArchitecture is TargetArchitecture.X86 or TargetArchitecture.X64) { // We set these hardware features as opportunistically enabled as most of hardware in the wild supports them. // Note that we do not indicate support for AVX, or any other instruction set which uses the VEX encodings as @@ -235,7 +236,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512"); } } - else if (allowOptimistic && targetArchitecture == TargetArchitecture.ARM64) + else if (allowOptimistic && targetArchitecture is TargetArchitecture.ARM64) { optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("crc"); @@ -252,7 +253,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSet.Remove(unsupportedInstructionSet); optimisticInstructionSet.Add(supportedInstructionSet); - if (flags.HasFlag(InstructionSetSupportFlags.Vector512Throttling)) + if (throttleAvx512) { Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F); if (supportedInstructionSet.HasInstructionSet(InstructionSet.X86_AVX512F)) @@ -287,8 +288,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru unsupportedInstructionSet, optimisticInstructionSet, InstructionSetSupportBuilder.GetNonSpecifiableInstructionSetsForArch(targetArchitecture), - targetArchitecture, - flags); + targetArchitecture); } } }