diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index e465cd68058ef4..79964ca1289842 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -650,6 +650,7 @@ class CodeGen final : public CodeGenInterface
#if defined(TARGET_AMD64)
void genAmd64EmitterUnitTestsSse2();
void genAmd64EmitterUnitTestsApx();
+ void genAmd64EmitterUnitTestsAvx10v2();
#endif
#endif // defined(DEBUG)
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index dec27882bb2285..d535dfb1454570 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -1832,15 +1832,26 @@ void CodeGen::genGenerateMachineCode()
#if defined(TARGET_X86)
if (compiler->canUseEvexEncoding())
{
- if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
+ if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
+ {
+ if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2_V512))
+ {
+ printf("X86 with AVX10.2/512");
+ }
+ else
+ {
+ printf("X86 with AVX10.2/256");
+ }
+ }
+ else if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512))
{
- printf("X86 with AVX10/512");
+ printf("X86 with AVX10.1/512");
}
else
{
- printf("X86 with AVX10/256");
+ printf("X86 with AVX10.1/256");
}
}
else
@@ -1860,15 +1871,26 @@ void CodeGen::genGenerateMachineCode()
#elif defined(TARGET_AMD64)
if (compiler->canUseEvexEncoding())
{
- if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
+ if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
+ {
+ if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2_V512))
+ {
+ printf("X64 with AVX10.2/512");
+ }
+ else
+ {
+ printf("X64 with AVX10.2/256");
+ }
+ }
+ else if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512))
{
- printf("X64 with AVX10/512");
+ printf("X64 with AVX10.1/512");
}
else
{
- printf("X64 with AVX10/256");
+ printf("X64 with AVX10.1/256");
}
}
else
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index 53257afb010b74..945f468d86fa81 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -2712,6 +2712,10 @@ void CodeGen::genEmitterUnitTests()
{
genAmd64EmitterUnitTestsApx();
}
+ if (unitTestSectionAll || (strstr(unitTestSection, "avx10v2") != nullptr))
+ {
+ genAmd64EmitterUnitTestsAvx10v2();
+ }
#elif defined(TARGET_ARM64)
if (unitTestSectionAll || (strstr(unitTestSection, "general") != nullptr))
diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 750b2d9818ba73..4f9b2c4f0184b2 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -9239,6 +9239,159 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
theEmitter->emitIns_S(INS_not, EA_2BYTE, 0, 0);
}
+void CodeGen::genAmd64EmitterUnitTestsAvx10v2()
+{
+ // All the Avx10.2 instructions are evex and evex only has one size.
+ // Also, there is no specialized handling for XMM0 vs XMM9 vs XMM16
+
+ emitter* theEmitter = GetEmitter();
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // This test suite needs AVX10.2 enabled.
+ if (!theEmitter->emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2))
+ {
+ return;
+ }
+
+ // packed conversion instructions
+ theEmitter->emitIns_R_R(INS_vcvttps2dqs, EA_16BYTE, REG_XMM0, REG_XMM1); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttps2dqs, EA_16BYTE, REG_XMM9, REG_XMM10); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttps2dqs, EA_16BYTE, REG_XMM15, REG_XMM16); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttps2dqs, EA_32BYTE, REG_XMM0, REG_XMM1); // ymm
+ theEmitter->emitIns_R_R(INS_vcvttps2dqs, EA_64BYTE, REG_XMM0, REG_XMM1); // zmm
+
+ theEmitter->emitIns_R_R(INS_vcvttps2udqs, EA_16BYTE, REG_XMM0, REG_XMM1); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttps2udqs, EA_16BYTE, REG_XMM9, REG_XMM10); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttps2udqs, EA_16BYTE, REG_XMM15, REG_XMM16); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttps2udqs, EA_32BYTE, REG_XMM0, REG_XMM1); // ymm
+ theEmitter->emitIns_R_R(INS_vcvttps2udqs, EA_64BYTE, REG_XMM0, REG_XMM1); // zmm
+
+ theEmitter->emitIns_R_R(INS_vcvttpd2qqs, EA_16BYTE, REG_XMM0, REG_XMM1); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttpd2qqs, EA_16BYTE, REG_XMM9, REG_XMM10); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttpd2qqs, EA_16BYTE, REG_XMM15, REG_XMM16); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttpd2qqs, EA_32BYTE, REG_XMM0, REG_XMM1); // ymm
+ theEmitter->emitIns_R_R(INS_vcvttpd2qqs, EA_64BYTE, REG_XMM0, REG_XMM1); // zmm
+
+ theEmitter->emitIns_R_R(INS_vcvttpd2uqqs, EA_16BYTE, REG_XMM0, REG_XMM1); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttpd2uqqs, EA_16BYTE, REG_XMM9, REG_XMM10); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttpd2uqqs, EA_16BYTE, REG_XMM15, REG_XMM16); // xmm
+ theEmitter->emitIns_R_R(INS_vcvttpd2uqqs, EA_32BYTE, REG_XMM0, REG_XMM1); // ymm
+ theEmitter->emitIns_R_R(INS_vcvttpd2uqqs, EA_64BYTE, REG_XMM0, REG_XMM1); // zmm
+
+ // scalar conversion instructions
+ theEmitter->emitIns_R_R(INS_vcvttsd2sis32, EA_4BYTE, REG_EAX, REG_XMM0);
+ theEmitter->emitIns_R_R(INS_vcvttsd2sis64, EA_8BYTE, REG_RAX, REG_XMM0);
+ theEmitter->emitIns_R_R(INS_vcvttsd2usis32, EA_4BYTE, REG_EAX, REG_XMM0);
+ theEmitter->emitIns_R_R(INS_vcvttsd2usis64, EA_8BYTE, REG_RAX, REG_XMM0);
+ theEmitter->emitIns_R_R(INS_vcvttss2sis32, EA_4BYTE, REG_EAX, REG_XMM0);
+ theEmitter->emitIns_R_R(INS_vcvttss2sis64, EA_8BYTE, REG_RAX, REG_XMM0);
+ theEmitter->emitIns_R_R(INS_vcvttss2usis32, EA_4BYTE, REG_EAX, REG_XMM0);
+ theEmitter->emitIns_R_R(INS_vcvttss2usis64, EA_8BYTE, REG_RAX, REG_XMM0);
+
+ // minmax instruction
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxss, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2, 0);
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxss, EA_16BYTE, REG_XMM8, REG_XMM9, REG_XMM10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxss, EA_16BYTE, REG_XMM14, REG_XMM15, REG_XMM16, 0);
+
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxsd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2, 0);
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxsd, EA_16BYTE, REG_XMM9, REG_XMM10, REG_XMM11, 0);
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxsd, EA_16BYTE, REG_XMM16, REG_XMM17, REG_XMM18, 0);
+
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2, 0);
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2, 0);
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxps, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2, 0);
+ theEmitter->emitIns_R_R_R_I(INS_vminmaxpd, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2, 0);
+
+ // VCVT[,T]PS2I[,U]BS
+ theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_16BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_32BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_32BYTE, REG_XMM0, REG_XMM1, INS_OPTS_EVEX_er_ru);
+ theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_64BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_64BYTE, REG_XMM0, REG_XMM1, INS_OPTS_EVEX_er_ru);
+
+ theEmitter->emitIns_R_R(INS_vcvtps2iubs, EA_16BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvtps2iubs, EA_32BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvtps2iubs, EA_32BYTE, REG_XMM0, REG_XMM1, INS_OPTS_EVEX_er_rz);
+ theEmitter->emitIns_R_R(INS_vcvtps2iubs, EA_64BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvtps2iubs, EA_64BYTE, REG_XMM0, REG_XMM1, INS_OPTS_EVEX_er_rz);
+
+ theEmitter->emitIns_R_R(INS_vcvttps2ibs, EA_16BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvttps2ibs, EA_32BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvttps2ibs, EA_32BYTE, REG_XMM0, REG_XMM1, INS_OPTS_EVEX_eb_er_rd);
+ theEmitter->emitIns_R_R(INS_vcvttps2ibs, EA_64BYTE, REG_XMM0, REG_XMM1);
+
+ theEmitter->emitIns_R_R(INS_vcvttps2iubs, EA_16BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvttps2iubs, EA_32BYTE, REG_XMM0, REG_XMM1);
+ theEmitter->emitIns_R_R(INS_vcvttps2iubs, EA_32BYTE, REG_XMM0, REG_XMM1, INS_OPTS_EVEX_er_ru);
+ theEmitter->emitIns_R_R(INS_vcvttps2iubs, EA_64BYTE, REG_XMM0, REG_XMM1);
+
+ // VPDPW[SU,US,UU]D[,S]
+ theEmitter->emitIns_R_R_R(INS_vpdpwsud, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwsud, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwsud, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwsuds, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwsuds, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwsuds, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+
+ theEmitter->emitIns_R_R_R(INS_vpdpwusd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwusd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwusd, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwusds, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwusds, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwusds, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+
+ theEmitter->emitIns_R_R_R(INS_vpdpwuud, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwuud, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwuud, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwuuds, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwuuds, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpwuuds, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+
+ // VPDPB[SU,UU,SS]D[,S]
+ theEmitter->emitIns_R_R_R(INS_vpdpbssd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbssd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbssd, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbssds, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbssds, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbssds, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+
+ theEmitter->emitIns_R_R_R(INS_vpdpbsud, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbsud, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbsud, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbsuds, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbsuds, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbsuds, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+
+ theEmitter->emitIns_R_R_R(INS_vpdpbuud, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbuud, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbuud, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbuuds, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbuuds, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ theEmitter->emitIns_R_R_R(INS_vpdpbuuds, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+
+ // VMPSADBW
+ theEmitter->emitIns_R_R_R_I(INS_vmpsadbw, EA_64BYTE, REG_XMM0, REG_XMM1, REG_XMM2, 0); // zmm
+
+ // VCOMXSD
+ theEmitter->emitIns_R_R(INS_vcomxsd, EA_16BYTE, REG_XMM0, REG_XMM1);
+
+ // VCOMXSS
+ theEmitter->emitIns_R_R(INS_vcomxss, EA_16BYTE, REG_XMM0, REG_XMM1);
+
+ // VUCOMXSD
+ theEmitter->emitIns_R_R(INS_vucomxsd, EA_16BYTE, REG_XMM0, REG_XMM1);
+
+ // VUCOMXSS
+ theEmitter->emitIns_R_R(INS_vucomxss, EA_16BYTE, REG_XMM0, REG_XMM1);
+
+ // VMOVD
+ theEmitter->emitIns_R_R(INS_vmovd, EA_16BYTE, REG_XMM0, REG_XMM1);
+
+ // VMOVW
+ theEmitter->emitIns_R_R(INS_vmovw, EA_16BYTE, REG_XMM0, REG_XMM1);
+}
+
#endif // defined(DEBUG) && defined(TARGET_AMD64)
#ifdef PROFILING_SUPPORTED
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index dc0f977b608622..02461633f3c547 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -1607,36 +1607,36 @@ class emitter
bool idIsBound() const
{
- assert(!IsAvx512OrPriorInstruction(_idIns));
+ assert(!IsSimdInstruction(_idIns));
return _idBound != 0;
}
void idSetIsBound()
{
- assert(!IsAvx512OrPriorInstruction(_idIns));
+ assert(!IsSimdInstruction(_idIns));
_idBound = 1;
}
#ifndef TARGET_ARMARCH
bool idIsCallRegPtr() const
{
- assert(!IsAvx512OrPriorInstruction(_idIns));
+ assert(!IsSimdInstruction(_idIns));
return _idCallRegPtr != 0;
}
void idSetIsCallRegPtr()
{
- assert(!IsAvx512OrPriorInstruction(_idIns));
+ assert(!IsSimdInstruction(_idIns));
_idCallRegPtr = 1;
}
#endif // !TARGET_ARMARCH
bool idIsTlsGD() const
{
- assert(!IsAvx512OrPriorInstruction(_idIns));
+ assert(!IsSimdInstruction(_idIns));
return _idTlsGD != 0;
}
void idSetTlsGD()
{
- assert(!IsAvx512OrPriorInstruction(_idIns));
+ assert(!IsSimdInstruction(_idIns));
_idTlsGD = 1;
}
@@ -1645,12 +1645,12 @@ class emitter
// code, it is not necessary to generate GC info for a call so labeled.
bool idIsNoGC() const
{
- assert(!IsAvx512OrPriorInstruction(_idIns));
+ assert(!IsSimdInstruction(_idIns));
return _idNoGC != 0;
}
void idSetIsNoGC(bool val)
{
- assert(!IsAvx512OrPriorInstruction(_idIns));
+ assert(!IsSimdInstruction(_idIns));
_idNoGC = val;
}
@@ -1703,7 +1703,7 @@ class emitter
unsigned idGetEvexAaaContext() const
{
- assert(IsAvx512OrPriorInstruction(_idIns));
+ assert(IsSimdInstruction(_idIns));
return _idEvexAaaContext;
}
@@ -1719,7 +1719,7 @@ class emitter
bool idIsEvexZContextSet() const
{
- assert(IsAvx512OrPriorInstruction(_idIns));
+ assert(IsSimdInstruction(_idIns));
return _idEvexZContext != 0;
}
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 77f8787a3e2457..ddec8af5e753f5 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1417,7 +1417,9 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const
// - R, X, B, W - bits to express corresponding REX prefixes.Additionally, X combines with B to expand r/m to 32 SIMD
// registers
// - R' - combines with R to expand reg to 32 SIMD registers
-// - mm - lower 2 bits of m-mmmmm (5-bit) in corresponding VEX prefix
+// - mmm - Encodes the map number to which the instruction belongs to
+// mm - lower 2 bits of m-mmmmm (5-bit) in corresponding VEX prefix (For AVX10.1 and below)
+// mmm - map number to which the instruction belongs to (For AVX10.2 and above)
// - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
// - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
// these prefixes are treated mandatory when used with escape opcode 0Fh for
@@ -1433,6 +1435,11 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const
// - V'- bit to extend vvvv
// - aaa - specifies mask register
// Rest - reserved for future use and usage of them will uresult in Undefined instruction exception.
+// - u - Bit to indicate YMM Embedded rounding.
+// Set to 1 for isas Avx10.1 and below
+// Needs to be set to 0 for AVX10.2 and above to indicate YMM embedded rounding
+// - B' - reserved as of now
+// set to 0 for future compatibility.
//
#define DEFAULT_BYTE_EVEX_PREFIX 0x62F07C0800000000ULL
@@ -1441,6 +1448,7 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const
#define LBIT_IN_BYTE_EVEX_PREFIX 0x0000002000000000ULL
#define LPRIMEBIT_IN_BYTE_EVEX_PREFIX 0x0000004000000000ULL
#define ZBIT_IN_BYTE_EVEX_PREFIX 0x0000008000000000ULL
+#define uBIT_IN_BYTE_EVEX_PREFIX 0x0000040000000000ULL
//------------------------------------------------------------------------
// AddEvexPrefix: Add default EVEX prefix with only LL' bits set.
@@ -1482,7 +1490,13 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt
if (!id->idHasMem())
{
- // embedded rounding case.
+ // ymm embedded rounding case.
+ if (attr == EA_32BYTE)
+ {
+ assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2));
+ code &= ~(uBIT_IN_BYTE_EVEX_PREFIX);
+ }
+
unsigned roundingMode = id->idGetEvexbContext();
if (roundingMode == 1)
{
@@ -1740,7 +1754,7 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
}
}
- assert(!IsAvx512OrPriorInstruction(ins));
+ assert(!IsSimdInstruction(ins));
#ifdef TARGET_AMD64
// movsx should always sign extend out to 8 bytes just because we don't track
@@ -2220,8 +2234,14 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
// check for a prefix in the 11 position
BYTE sizePrefix = (code >> 16) & 0xFF;
- if ((sizePrefix != 0) && isPrefix(sizePrefix))
+ if (sizePrefix == 0)
+ {
+ // no simd prefix for EVEX2 - AVX10.2 and above
+ assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2));
+ }
+ else if (isPrefix(sizePrefix))
{
+ // EVEX1 - EVEX encoding before Avx10.2
// 'pp' bits in byte 1 of EVEX prefix allows us to encode SIMD size prefixes as two bits
//
// 00 - None (0F - packed float)
@@ -2256,37 +2276,49 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
unreached();
}
}
+ }
+ else
+ {
+ unreached();
+ }
+ // Now the byte in the 22 position should be either of the below:
+ // 1. An escape byte 0F (For isa before AVX10.2)
+ // 2. A map number from 0 to 7 (For AVX10.2 and above)
+ leadingBytes = check;
+ assert(leadingBytes == 0x0F || (emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) &&
+ leadingBytes >= 0x00 && leadingBytes <= 0x07));
- // Now the byte in the 22 position must be an escape byte 0F
- leadingBytes = check;
- assert(leadingBytes == 0x0F);
-
- // Get rid of both sizePrefix and escape byte
- code &= 0x0000FFFFLL;
+ // Get rid of both sizePrefix and escape byte
+ code &= 0x0000FFFFLL;
- // Check the byte in the 33 position to see if it is 3A or 38.
- // In such a case escape bytes must be 0x0F3A or 0x0F38
- check = code & 0xFF;
+ // Check the byte in the 33 position to see if it is 3A or 38.
+ // In such a case escape bytes must be 0x0F3A or 0x0F38
+ check = code & 0xFF;
- if ((check == 0x3A) || (check == 0x38))
- {
- leadingBytes = (leadingBytes << 8) | check;
- code &= 0x0000FF00LL;
- }
+ if ((check == 0x3A) || (check == 0x38))
+ {
+ leadingBytes = (leadingBytes << 8) | check;
+ code &= 0x0000FF00LL;
}
}
else
{
- // 2-byte opcode with the bytes ordered as 0x0011RM22
- // the byte in position 11 must be an escape byte.
+ // 2-byte opcode with the bytes ordered as 0x0011RM22. There are 2 posibilities here:
+ // 1. the byte in position 11 must be an escape byte.
+ // 2. the byte in position 11 must be a map number from 0 to 7.
leadingBytes = (code >> 16) & 0xFF;
- assert(leadingBytes == 0x0F || leadingBytes == 0x00);
+ assert(leadingBytes == 0x0F || (emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) &&
+ leadingBytes >= 0x00 && leadingBytes <= 0x07));
code &= 0xFFFF;
}
- // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
- // mm bits in byte 0 of EVEX prefix allows us to encode these
- // implied leading bytes. They are identical to low two bits of VEX.mmmmm
+ // Encode the escape byte in the evex prefix using either of the below:
+ // 1. If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
+ // mm bits in byte 0 of EVEX prefix allows us to encode these
+ // implied leading bytes. They are identical to low two bits of VEX.mmmmm
+ // 2. If there is no escape byte but a map number from 0 to 7,
+ // EVEX.mmm bits in byte 0 of EVEX prefix allows us to encode these
+ // map numbers
switch (leadingBytes)
{
@@ -2314,6 +2346,19 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
break;
}
+ case 0x05:
+ {
+ assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2));
+ evexPrefix |= (0x05 << 16);
+ break;
+ }
+
+ case 0x01:
+ case 0x02:
+ case 0x03:
+ case 0x04:
+ case 0x06:
+ case 0x07:
default:
{
assert(!"encountered unknown leading bytes");
@@ -3066,7 +3111,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
{
instruction ins = id->idIns();
- if (!IsAvx512OrPriorInstruction(ins))
+ if (!IsSimdInstruction(ins))
{
return false;
}
@@ -3511,7 +3556,7 @@ bool emitter::EncodedBySSE38orSSE3A(instruction ins) const
size_t insCode = 0;
- if (!IsAvx512OrPriorInstruction(ins))
+ if (!IsSimdInstruction(ins))
{
return false;
}
@@ -4081,7 +4126,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code, int val
}
else
{
- assert(!IsAvx512OrPriorInstruction(ins));
+ assert(!IsSimdInstruction(ins));
}
return valSize + emitInsSizeRR(id, code);
@@ -4130,7 +4175,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id)
if ((code & 0xFF00) != 0)
{
- sz += IsAvx512OrPriorInstruction(ins) ? emitInsSize(id, code, includeRexPrefixSize) : 5;
+ sz += IsSimdInstruction(ins) ? emitInsSize(id, code, includeRexPrefixSize) : 5;
}
else
{
@@ -4698,7 +4743,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val
}
else
{
- assert(!IsAvx512OrPriorInstruction(ins));
+ assert(!IsSimdInstruction(ins));
}
return valSize + emitInsSizeAM(id, code);
@@ -4757,7 +4802,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val
}
else
{
- assert(!IsAvx512OrPriorInstruction(ins));
+ assert(!IsSimdInstruction(ins));
}
return valSize + emitInsSizeCV(id, code);
@@ -6058,7 +6103,7 @@ void emitter::emitIns_R_I(instruction ins,
emitAttr size = EA_SIZE(attr);
// Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
- assert(size <= EA_PTRSIZE || IsAvx512OrPriorInstruction(ins));
+ assert(size <= EA_PTRSIZE || IsSimdInstruction(ins));
noway_assert(emitVerifyEncodable(ins, size, reg));
@@ -6126,7 +6171,7 @@ void emitter::emitIns_R_I(instruction ins,
if (valInByte)
{
- if (IsAvx512OrPriorInstruction(ins))
+ if (IsSimdInstruction(ins))
{
sz = 1;
isSimdInsAndValInByte = true;
@@ -6142,7 +6187,7 @@ void emitter::emitIns_R_I(instruction ins,
}
else
{
- assert(!IsAvx512OrPriorInstruction(ins));
+ assert(!IsSimdInstruction(ins));
if (reg == REG_EAX && !instrIs3opImul(ins))
{
@@ -7079,7 +7124,7 @@ void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int off
void emitter::emitIns_AR_R_R(
instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs, insOpts instOptions)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
instrDesc* id = emitNewInstrAmd(attr, offs);
@@ -7126,7 +7171,7 @@ void emitter::emitIns_R_A_I(
instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival, insOpts instOptions)
{
noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
ssize_t offs = indir->Offset();
instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
@@ -7172,7 +7217,7 @@ void emitter::emitIns_R_C_I(instruction ins,
}
noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
@@ -7206,7 +7251,7 @@ void emitter::emitIns_R_S_I(
instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival, insOpts instOptions)
{
noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
instrDesc* id = emitNewInstrCns(attr, ival);
@@ -7243,7 +7288,7 @@ void emitter::emitIns_R_S_I(
void emitter::emitIns_R_R_A(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insOpts instOptions)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
ssize_t offs = indir->Offset();
@@ -7267,7 +7312,7 @@ void emitter::emitIns_R_R_A(
void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
instrDesc* id = emitNewInstrAmd(attr, offs);
@@ -7363,7 +7408,7 @@ void emitter::emitIns_R_R_C(instruction ins,
int offs,
insOpts instOptions)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
// Static always need relocs
@@ -7398,7 +7443,7 @@ void emitter::emitIns_R_R_C(instruction ins,
void emitter::emitIns_R_R_R(
instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, insOpts instOptions)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins));
instrDesc* id = emitNewInstr(attr);
@@ -7426,7 +7471,7 @@ void emitter::emitIns_R_R_R(
void emitter::emitIns_R_R_S(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, insOpts instOptions)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
instrDesc* id = emitNewInstr(attr);
@@ -7460,7 +7505,7 @@ void emitter::emitIns_R_R_A_I(instruction ins,
insFormat fmt,
insOpts instOptions)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
ssize_t offs = indir->Offset();
@@ -7485,7 +7530,7 @@ void emitter::emitIns_R_R_A_I(instruction ins,
void emitter::emitIns_R_R_AR_I(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
@@ -7514,7 +7559,7 @@ void emitter::emitIns_R_R_C_I(instruction ins,
int ival,
insOpts instOptions)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
// Static always need relocs
@@ -7557,7 +7602,7 @@ void emitter::emitIns_R_R_C_I(instruction ins,
void emitter::emitIns_R_R_R_I(
instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival, insOpts instOptions)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
instrDesc* id = emitNewInstrCns(attr, ival);
@@ -7580,7 +7625,7 @@ void emitter::emitIns_R_R_R_I(
void emitter::emitIns_R_R_S_I(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival, insOpts instOptions)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
instrDesc* id = emitNewInstrCns(attr, ival);
@@ -8285,7 +8330,7 @@ void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber reg, regNum
void emitter::emitIns_C_R_I(
instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, regNumber reg, int ival)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(reg != REG_NA);
// Static always need relocs
@@ -8322,7 +8367,7 @@ void emitter::emitIns_C_R_I(
//
void emitter::emitIns_S_R_I(instruction ins, emitAttr attr, int varNum, int offs, regNumber reg, int ival)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(reg != REG_NA);
instrDesc* id = emitNewInstrAmdCns(attr, 0, ival);
@@ -8355,7 +8400,7 @@ void emitter::emitIns_S_R_I(instruction ins, emitAttr attr, int varNum, int offs
//
void emitter::emitIns_A_R_I(instruction ins, emitAttr attr, GenTreeIndir* indir, regNumber reg, int imm)
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
assert(reg != REG_NA);
instrDesc* id = emitNewInstrAmdCns(attr, indir->Offset(), imm);
@@ -13289,7 +13334,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
// SSE/AVX do not need to modify opcode
if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
{
- if (id->idInsFmt() != IF_ARW_SHF && !IsAvx512OrPriorInstruction(ins))
+ if (id->idInsFmt() != IF_ARW_SHF && !IsSimdInstruction(ins))
{
code |= 2;
}
@@ -13672,7 +13717,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
#else
dst += emitOutputLong(dst, dsp);
#endif
- if (!IsAvx512OrPriorInstruction(ins) && id->idIsTlsGD())
+ if (!IsSimdInstruction(ins) && id->idIsTlsGD())
{
addlDelta = -4;
emitRecordRelocationWithAddlDelta((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_TLSGD,
@@ -14236,7 +14281,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
{
if ((id->idInsFmt() != IF_SRW_SHF) && (id->idInsFmt() != IF_RRW_SRD_CNS) &&
- (id->idInsFmt() != IF_RWR_RRD_SRD_CNS) && !IsAvx512OrPriorInstruction(ins))
+ (id->idInsFmt() != IF_RWR_RRD_SRD_CNS) && !IsSimdInstruction(ins))
{
code |= 2;
}
@@ -14736,7 +14781,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
{
// SSE/AVX do not need to modify opcode
- if (id->idInsFmt() != IF_MRW_SHF && !IsAvx512OrPriorInstruction(ins))
+ if (id->idInsFmt() != IF_MRW_SHF && !IsSimdInstruction(ins))
{
code |= 2;
}
@@ -15451,7 +15496,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
assert(!id->idHasReg3());
- if (IsAvx512OrPriorInstruction(ins))
+ if (IsSimdInstruction(ins))
{
assert((ins != INS_movd) || (isFloatReg(reg1) != isFloatReg(reg2)));
@@ -15980,7 +16025,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
noway_assert(emitVerifyEncodable(ins, size, reg));
- if (IsAvx512OrPriorInstruction(ins))
+ if (IsSimdInstruction(ins))
{
// Handle SSE2 instructions of the form "opcode reg, immed8"
@@ -17763,7 +17808,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case IF_RWR_ARD_CNS:
case IF_RRW_ARD_CNS:
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
emitGetInsAmdCns(id, &cnsVal);
if (hasCodeMI(ins))
@@ -17801,7 +17846,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case IF_AWR_RRD_CNS:
case IF_ARW_RRD_CNS:
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
emitGetInsAmdCns(id, &cnsVal);
dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
sz = emitSizeOfInsDsc_AMD(id);
@@ -17848,7 +17893,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case IF_RWR_RRD_ARD_CNS:
case IF_RWR_RRD_ARD_RRD:
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
code = insCodeRM(ins);
emitGetInsAmdCns(id, &cnsVal);
@@ -17994,7 +18039,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case IF_SWR_RRD_CNS:
case IF_SRW_RRD_CNS:
{
- assert(IsAvx512OrPriorInstruction(ins) || (ins == INS_shld) || (ins == INS_shrd));
+ assert(IsSimdInstruction(ins) || (ins == INS_shld) || (ins == INS_shrd));
emitGetInsAmdCns(id, &cnsVal);
dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
sz = emitSizeOfInsDsc_CNS(id);
@@ -18005,7 +18050,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case IF_RWR_SRD_CNS:
case IF_RRW_SRD_CNS:
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
emitGetInsCns(id, &cnsVal);
if (hasCodeMI(ins))
@@ -18234,7 +18279,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case IF_RWR_MRD_CNS:
case IF_RRW_MRD_CNS:
{
- assert(IsAvx512OrPriorInstruction(ins));
+ assert(IsSimdInstruction(ins));
emitGetInsDcmCns(id, &cnsVal);
if (hasCodeMI(ins))
@@ -19478,6 +19523,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vmovdqu8:
case INS_vmovdqu16:
case INS_vmovdqu64:
+ case INS_vmovd:
+ case INS_vmovw:
case INS_movaps:
case INS_movups:
case INS_movapd:
@@ -19619,6 +19666,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vrangeps:
case INS_vrangesd:
case INS_vrangess:
+ case INS_vminmaxsd:
+ case INS_vminmaxss:
+ case INS_vminmaxpd:
+ case INS_vminmaxps:
case INS_vreducepd:
case INS_vreduceps:
case INS_vreducesd:
@@ -19633,6 +19684,17 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;
}
+ // TODO-XArch-AVX10.2: handle perf for AVX10.2 instructions
+ case INS_vcomxsd:
+ case INS_vcomxss:
+ case INS_vucomxsd:
+ case INS_vucomxss:
+ {
+ result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+ result.insLatency += PERFSCORE_LATENCY_4C;
+ break;
+ }
+
case INS_vpermi2b:
case INS_vpermt2b:
{
@@ -19808,8 +19870,24 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vcvtusi2ss32:
case INS_vcvtusi2ss64:
case INS_vcvttsd2usi32:
+ case INS_vcvttsd2usis32:
case INS_vcvttsd2usi64:
+ case INS_vcvttsd2usis64:
case INS_vcvttss2usi32:
+ case INS_vcvttss2usis32:
+ case INS_vcvttsd2sis32:
+ case INS_vcvttsd2sis64:
+ case INS_vcvttss2sis32:
+ case INS_vcvttss2sis64:
+ case INS_vcvttss2usis64:
+ case INS_vcvttps2dqs:
+ case INS_vcvttps2udqs:
+ case INS_vcvttpd2qqs:
+ case INS_vcvttpd2uqqs:
+ case INS_vcvttps2ibs:
+ case INS_vcvttps2iubs:
+ case INS_vcvtps2ibs:
+ case INS_vcvtps2iubs:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_7C;
break;
@@ -20246,6 +20324,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
}
case INS_mpsadbw:
+ case INS_vmpsadbw:
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency += PERFSCORE_LATENCY_4C;
break;
@@ -20259,9 +20338,21 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_pmaddwd:
case INS_pmaddubsw:
case INS_vpdpbusd:
- case INS_vpdpwssd:
case INS_vpdpbusds:
+ case INS_vpdpbssd:
+ case INS_vpdpbssds:
+ case INS_vpdpbsud:
+ case INS_vpdpbsuds:
+ case INS_vpdpbuud:
+ case INS_vpdpbuuds:
case INS_vpdpwssds:
+ case INS_vpdpwssd:
+ case INS_vpdpwsud:
+ case INS_vpdpwsuds:
+ case INS_vpdpwusd:
+ case INS_vpdpwusds:
+ case INS_vpdpwuud:
+ case INS_vpdpwuuds:
case INS_gf2p8affineinvqb:
case INS_gf2p8affineqb:
case INS_gf2p8mulb:
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 94293bdf4e6217..29897aae4ac7ea 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -27789,6 +27789,21 @@ bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const
case NI_AVX10v1_MultiplyScalar:
case NI_AVX10v1_SubtractScalar:
case NI_AVX10v1_SqrtScalar:
+ case NI_AVX10v2_Add:
+ case NI_AVX10v2_ConvertToVector128Int32:
+ case NI_AVX10v2_ConvertToVector128Single:
+ case NI_AVX10v2_ConvertToVector128UInt32:
+ case NI_AVX10v2_ConvertToVector256Double:
+ case NI_AVX10v2_ConvertToVector256Int32:
+ case NI_AVX10v2_ConvertToVector256Int64:
+ case NI_AVX10v2_ConvertToVector256Single:
+ case NI_AVX10v2_ConvertToVector256UInt32:
+ case NI_AVX10v2_ConvertToVector256UInt64:
+ case NI_AVX10v2_Divide:
+ case NI_AVX10v2_Multiply:
+ case NI_AVX10v2_Scale:
+ case NI_AVX10v2_Sqrt:
+ case NI_AVX10v2_Subtract:
{
return true;
}
@@ -27857,6 +27872,10 @@ bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const
case NI_AVX10v1_V512_ConvertToVector512Double:
case NI_AVX10v1_V512_ConvertToVector512Int64:
case NI_AVX10v1_V512_ConvertToVector512UInt64:
+ case NI_AVX10v2_ConvertToSByteWithSaturationAndZeroExtendToInt32:
+ case NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32:
+ case NI_AVX10v2_V512_ConvertToSByteWithSaturationAndZeroExtendToInt32:
+ case NI_AVX10v2_V512_ConvertToByteWithSaturationAndZeroExtendToInt32:
{
return numArgs == 2;
}
@@ -28232,6 +28251,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX_Add:
case NI_AVX2_Add:
case NI_AVX512F_Add:
+ case NI_AVX10v2_Add:
case NI_AVX512BW_Add:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Add:
@@ -28268,6 +28288,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_SSE2_Divide:
case NI_AVX_Divide:
case NI_AVX512F_Divide:
+ case NI_AVX10v2_Divide:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Arm64_Divide:
#endif
@@ -28320,6 +28341,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
#if defined(TARGET_XARCH)
case NI_SSE2_Multiply:
case NI_AVX512F_Multiply:
+ case NI_AVX10v2_Multiply:
{
if (varTypeIsFloating(simdBaseType))
{
@@ -28485,6 +28507,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX2_Subtract:
case NI_AVX512F_Subtract:
case NI_AVX512BW_Subtract:
+ case NI_AVX10v2_Subtract:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Subtract:
case NI_AdvSimd_Arm64_Subtract:
diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp
index 553888bb996aa6..a00d57962d757b 100644
--- a/src/coreclr/jit/hwintrinsic.cpp
+++ b/src/coreclr/jit/hwintrinsic.cpp
@@ -807,8 +807,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // VectorT256
{ NI_Illegal, NI_Illegal }, // VectorT512
{ NI_Illegal, NI_Illegal }, // APX
- { NI_Illegal, NI_Illegal }, // AVX10v2
- { NI_Illegal, NI_Illegal }, // AVX10v2_V512
+ { FIRST_NI_AVX10v2, LAST_NI_AVX10v2 }, // AVX10v2
+ { FIRST_NI_AVX10v2_V512, LAST_NI_AVX10v2_V512 }, // AVX10v2_V512
{ FIRST_NI_GFNI, LAST_NI_GFNI },
{ FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 },
{ FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 },
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index b76781500c56fa..8a629436d25947 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -1432,6 +1432,56 @@ HARDWARE_INTRINSIC(AVX10v1_X64, ConvertToUInt64,
HARDWARE_INTRINSIC(AVX10v1_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
#define LAST_NI_AVX10v1_X64 NI_AVX10v1_X64_ConvertToUInt64WithTruncation
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// ISA Function name SIMD size NumArg Instructions Category Flags
+// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// AVX10v2 Intrinsics
+#define FIRST_NI_AVX10v2 NI_AVX10v2_Add
+HARDWARE_INTRINSIC(AVX10v2, Add, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative |HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVector128Int32, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVector128Single, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVector128UInt32, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256Double, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256Int32, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256Int64, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256Single, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256UInt32, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVector256UInt64, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, Divide, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, Multiply, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative |HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, Scale, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, Sqrt, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2, Subtract, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+#define LAST_NI_AVX10v2 NI_AVX10v2_Subtract
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// ISA Function name SIMD size NumArg Instructions Category Flags
+// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// AVX10v2_V512 Intrinsics
+#define FIRST_NI_AVX10v2_V512 NI_AVX10v2_V512_ConvertToByteWithSaturationAndZeroExtendToInt32
+HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToByteWithSaturationAndZeroExtendToInt32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToSByteWithSaturationAndZeroExtendToInt32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorInt32WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorInt64WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorUInt32WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorUInt64WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2_V512, MinMax, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
+HARDWARE_INTRINSIC(AVX10v2_V512, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
+#define LAST_NI_AVX10v2_V512 NI_AVX10v2_V512_MultipleSumAbsoluteDifferences
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index b8c8d43f787e0b..ddc13bf346295d 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -50,6 +50,10 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_AVX10v1_X64;
case InstructionSet_AVX10v1_V512:
return InstructionSet_AVX10v1_V512_X64;
+ case InstructionSet_AVX10v2:
+ return InstructionSet_AVX10v2_X64;
+ case InstructionSet_AVX10v2_V512:
+ return InstructionSet_AVX10v2_V512_X64;
case InstructionSet_AVXVNNI:
return InstructionSet_AVXVNNI_X64;
case InstructionSet_AES:
@@ -139,6 +143,10 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_AVX10v1_V512;
case InstructionSet_AVX10v1_X64:
return InstructionSet_AVX10v1_V512_X64;
+ case InstructionSet_AVX10v2:
+ return InstructionSet_AVX10v2_V512;
+ case InstructionSet_AVX10v2_X64:
+ return InstructionSet_AVX10v2_V512_X64;
case InstructionSet_GFNI:
return InstructionSet_GFNI_V512;
case InstructionSet_PCLMULQDQ:
@@ -172,9 +180,16 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className)
{
return InstructionSet_AVX;
}
- else if (strcmp(className + 3, "10v1") == 0)
+ else if (strncmp(className + 3, "10v", 3) == 0)
{
- return InstructionSet_AVX10v1;
+ if (strcmp(className + 6, "1") == 0)
+ {
+ return InstructionSet_AVX10v1;
+ }
+ else if (strcmp(className + 6, "2") == 0)
+ {
+ return InstructionSet_AVX10v2;
+ }
}
else if (strcmp(className + 3, "2") == 0)
{
@@ -412,6 +427,9 @@ int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic id)
case NI_AVX_CompareScalar:
case NI_AVX512F_Compare:
case NI_EVEX_CompareMask:
+ case NI_AVX10v2_MinMaxScalar:
+ case NI_AVX10v2_MinMax:
+ case NI_AVX10v2_V512_MinMax:
{
assert(!HWIntrinsicInfo::HasFullRangeImm(id));
return 31; // enum FloatComparisonMode has 32 values
@@ -910,6 +928,10 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
case InstructionSet_AVX10v1_X64:
case InstructionSet_AVX10v1_V512:
case InstructionSet_AVX10v1_V512_X64:
+ case InstructionSet_AVX10v2:
+ case InstructionSet_AVX10v2_X64:
+ case InstructionSet_AVX10v2_V512:
+ case InstructionSet_AVX10v2_V512_X64:
case InstructionSet_EVEX:
case InstructionSet_GFNI:
case InstructionSet_GFNI_X64:
diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h
index 5ec40ea333973c..4f00ca62627966 100644
--- a/src/coreclr/jit/instr.h
+++ b/src/coreclr/jit/instr.h
@@ -85,7 +85,7 @@ enum instruction : uint32_t
};
//------------------------------------------------------------------------
-// IsAvx512OrPriorInstruction: Is this an Avx512 or Avx or Sse or K (opmask) instruction.
+// IsSimdInstruction: Is this an Avx512 or Avx or Sse or K (opmask) instruction.
// Technically, K instructions would be considered under the VEX encoding umbrella, but due to
// the instruction table encoding had to be pulled out with the rest of the `INST5` definitions.
//
@@ -95,10 +95,10 @@ enum instruction : uint32_t
// Returns:
// `true` if it is a sse or avx or avx512 instruction.
//
-inline bool IsAvx512OrPriorInstruction(instruction ins)
+inline bool IsSimdInstruction(instruction ins)
{
#if defined(TARGET_XARCH)
- return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX512_INSTRUCTION);
+ return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX10v2_INSTRUCTION);
#else
return false;
#endif // TARGET_XARCH
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 3c6285405a0747..24be0ef3527b6a 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -172,6 +172,10 @@ INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868,
#define PCKDBL(c) PACK3(0x66, 0x0f, c)
#define PCKFLT(c) PACK2(0x0f, c)
#define PCKMVB(c) PACK3(0x0F, 0x38, c)
+#define PCKDBLMAP(m, c) PACK3(0x66, m, c)
+#define PCKFLTMAP(m, c) PACK2(m, c)
+#define SSEDBLMAP(m, c) PACK3(0xf2, m, c)
+#define SSEFLTMAP(m, c) PACK3(0xf3, m, c)
// These macros encode extra byte that is implicit in the macro.
#define PACK4(byte1,byte2,byte3,byte4) (((byte1) << 16) | ((byte2) << 24) | (byte3) | ((byte4) << 8))
@@ -181,6 +185,7 @@ INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868,
#define SSE38(c) PSSE38(0x66, c)
#define SSE3A(c) PSSE3A(0x66, c)
+#define AVX3A(c) PSSE3A(0xf3, c)
// VEX* encodes the implied leading opcode bytes in c1:
// 1: implied 0f, 2: implied 0f 38, 3: implied 0f 3a
@@ -884,6 +889,54 @@ INST3(vpmultishiftqb, "pmultishiftqb", IUM_WR, BAD_CODE, BAD_
INST3(LAST_AVX512_INSTRUCTION, "LAST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
+INST3(FIRST_AVX10v2_INSTRUCTION, "FIRST_AVX10v2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
+INST3(vcomxsd, "comxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare double precision floating point values and set flags
+INST3(vcomxss, "comxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2f), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare single precision floating point values and set flags
+INST3(vucomxsd, "ucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags
+INST3(vucomxss, "ucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags
+INST3(vcvttps2dqs, "cvttps2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed singles to DWORDs
+INST3(vcvttps2udqs, "cvttps2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed singles to unsigned DWORDs
+INST3(vcvttps2qqs, "cvttps2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed singles to signed QWORDs
+INST3(vcvttps2uqqs, "cvttps2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed singles to unsigned QWORDs
+INST3(vcvttpd2dqs, "cvttpd2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed doubles to DWORDs
+INST3(vcvttpd2udqs, "cvttpd2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed doubles to unsigned DWORDs
+INST3(vcvttpd2qqs, "cvttpd2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed doubles to signed QWORDs
+INST3(vcvttpd2uqqs, "cvttpd2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with truncation/saturation packed doubles to signed QWORDs
+INST3(vcvttsd2sis32, "cvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs
+INST3(vcvttsd2sis64, "cvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs
+INST3(vcvttsd2usis32, "cvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned DWORD
+INST3(vcvttsd2usis64, "cvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned QWORD
+INST3(vcvttss2sis32, "cvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD
+INST3(vcvttss2sis64, "cvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD
+INST3(vcvttss2usis32, "cvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD
+INST3(vcvttss2usis64, "cvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD
+
+INST3(vcvtps2ibs, "cvtps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x69), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX)
+INST3(vcvtps2iubs, "cvtps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6B), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD
+INST3(vcvttps2ibs, "cvttps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x68), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD
+INST3(vcvttps2iubs, "cvttps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD
+INST3(vmpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference
+
+INST3(vminmaxsd, "minmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar double
+INST3(vminmaxss, "minmaxss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar single
+INST3(vminmaxpd, "minmaxpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed doubles
+INST3(vminmaxps, "minmaxps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed singles
+INST3(vmovd, "movd", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Move DWORD between xmm regs <-> memory/xmm regs
+INST3(vmovw, "movw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_CODE, SSEFLTMAP(0x05, 0x6E), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move WORD between xmm regs <-> memory/xmm regs
+INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(LAST_AVX10v2_INSTRUCTION, "LAST_AVX10v2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
+
// Scalar instructions in SSE4.2
INST3(crc32, "crc32", IUM_RW, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF0), INS_TT_NONE, INS_FLAGS_None)
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 8b38842d8e9fc0..bf0f463fdea760 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -10971,6 +10971,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AVX10v1_V512_InsertVector128:
case NI_AVX10v1_V512_InsertVector256:
case NI_AVX10v1_V512_Range:
+ case NI_AVX10v2_MinMaxScalar:
+ case NI_AVX10v2_MinMax:
+ case NI_AVX10v2_V512_MinMax:
+ case NI_AVX10v2_V512_MultipleSumAbsoluteDifferences:
case NI_GFNI_GaloisFieldAffineTransform:
case NI_GFNI_GaloisFieldAffineTransformInverse:
case NI_GFNI_V256_GaloisFieldAffineTransform:
diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
index 61d304184b2865..064e2579bdca3c 100644
--- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
+++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
@@ -2608,6 +2608,7 @@
+
@@ -2640,6 +2641,7 @@
+
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v2.PlatformNotSupported.cs
new file mode 100644
index 00000000000000..bea42e51167f55
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v2.PlatformNotSupported.cs
@@ -0,0 +1,327 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Runtime.Intrinsics.X86
+{
+ /// Provides access to X86 Avx10.2 hardware instructions via intrinsics.
+ [CLSCompliant(false)]
+ public abstract class Avx10v2 : Avx10v1
+ {
+ internal Avx10v2() { }
+
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { [Intrinsic] get { return false; } }
+
+ ///
+ /// VMINMAXPD xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8
+ ///
+ public static Vector128 MinMax(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMINMAXPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {sae}, imm8
+ ///
+ public static Vector256 MinMax(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMINMAXPS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst, imm8
+ ///
+ public static Vector128 MinMax(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMINMAXPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {sae}, imm8
+ ///
+ public static Vector256 MinMax(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMINMAXSD xmm1{k1}{z}, xmm2, xmm3/m64 {sae}, imm8
+ ///
+ public static Vector128 MinMaxScalar(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMINMAXSS xmm1{k1}{z}, xmm2, xmm3/m32 {sae}, imm8
+ ///
+ public static Vector128 MinMaxScalar(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VADDPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Add(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VADDPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Add(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VDIVPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Divide(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VDIVPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Divide(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IBS xmm1{k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector128 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector128 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector256 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IUBS xmm1{k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector128 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector128 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector256 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTTPS2IBS xmm1{k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector128 ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32(Vector128 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {sae}
+ ///
+ public static Vector256 ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32(Vector256 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTTPS2IUBS xmm1{k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector128 ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32(Vector128 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {sae}
+ ///
+ public static Vector256 ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32(Vector256 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMOVD xmm1, xmm2/m32
+ ///
+ public static Vector128 ConvertToVector128UInt32(Vector128 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMOVW xmm1, xmm2/m16
+ ///
+ public static Vector128 ConvertToVector128UInt16(Vector128 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTDQ2PS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPD2DQ xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128Int32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPD2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPD2QQ ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Int64(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPD2UDQ xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128UInt32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPD2UQQ ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 ConvertToVector256UInt64(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2DQ ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Int32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2QQ ymm1{k1}{z}, xmm2/m128/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Int64(Vector128 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2UDQ ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256UInt32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2UQQ ymm1{k1}{z}, xmm2/m128/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256UInt64(Vector128 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTQQ2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTQQ2PD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Double(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTUDQ2PS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTUQQ2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTUQQ2PD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Double(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMULPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Multiply(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMULPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Multiply(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VSCALEFPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Scale(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VSCALEFPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Scale(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VSQRTPD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 Sqrt(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VSQRTPS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 Sqrt(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VSUBPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Subtract(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VSUBPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Subtract(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ /// Provides access to the x86 AVX10.2 hardware instructions, that are only available to 64-bit processes, via intrinsics.
+ public new abstract class X64 : Avx10v1.X64
+ {
+ internal X64() { }
+
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { [Intrinsic] get { return false; } }
+
+ }
+
+ /// Provides access to the x86 AVX10.2/512 hardware instructions via intrinsics.
+ public new abstract class V512 : Avx10v1.V512
+ {
+ internal V512() { }
+
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { [Intrinsic] get { return false; } }
+
+ ///
+ /// VMINMAXPD zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst {sae}, imm8
+ ///
+ public static Vector512 MinMax(Vector512 left, Vector512 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMINMAXPS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst {sae}, imm8
+ ///
+ public static Vector512 MinMax(Vector512 left, Vector512 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
+ ///
+ public static Vector512 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector512 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
+ ///
+ public static Vector512 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector512 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
+ ///
+ public static Vector512 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector512 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
+ ///
+ public static Vector512 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector512 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {sae}
+ ///
+ public static Vector512 ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32(Vector512 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VCVTTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {sae}
+ ///
+ public static Vector512 ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32(Vector512 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// VMPSADBW zmm1{k1}{z}, zmm2, zmm3/m512, imm8
+ ///
+ public static Vector512 MultipleSumAbsoluteDifferences(Vector512 left, Vector512 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
+
+ /// Provides access to the x86 AVX10.1/512 hardware instructions, that are only available to 64-bit processes, via intrinsics.
+ [Intrinsic]
+ public new abstract class X64 : Avx10v1.V512.X64
+ {
+ internal X64() { }
+
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { [Intrinsic] get { return false; } }
+ }
+ }
+ }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v2.cs
new file mode 100644
index 00000000000000..5c9cba0625fd5c
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v2.cs
@@ -0,0 +1,327 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+
+namespace System.Runtime.Intrinsics.X86
+{
+ /// Provides access to X86 AVX10.2 hardware instructions via intrinsics
+ [Intrinsic]
+ [CLSCompliant(false)]
+ public abstract class Avx10v2 : Avx10v1
+ {
+ internal Avx10v2() { }
+
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { get => IsSupported; }
+
+ ///
+ /// VMINMAXPD xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8
+ ///
+ public static Vector128 MinMax(Vector128 left, Vector128 right, [ConstantExpected] byte control) => MinMax(left, right, control);
+
+ ///
+ /// VMINMAXPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {sae}, imm8
+ ///
+ public static Vector256 MinMax(Vector256 left, Vector256 right, [ConstantExpected] byte control) => MinMax(left, right, control);
+
+ ///
+ /// VMINMAXPS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst, imm8
+ ///
+ public static Vector128 MinMax(Vector128 left, Vector128 right, [ConstantExpected] byte control) => MinMax(left, right, control);
+
+ ///
+ /// VMINMAXPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {sae}, imm8
+ ///
+ public static Vector256 MinMax(Vector256 left, Vector256 right, [ConstantExpected] byte control) => MinMax(left, right, control);
+
+ ///
+ /// VMINMAXSD xmm1{k1}{z}, xmm2, xmm3/m64 {sae}, imm8
+ ///
+ public static Vector128 MinMaxScalar(Vector128 left, Vector128 right, [ConstantExpected] byte control) => MinMaxScalar(left, right, control);
+
+ ///
+ /// VMINMAXSS xmm1{k1}{z}, xmm2, xmm3/m32 {sae}, imm8
+ ///
+ public static Vector128 MinMaxScalar(Vector128 left, Vector128 right, [ConstantExpected] byte control) => MinMaxScalar(left, right, control);
+
+ ///
+ /// VCVTPS2IBS xmm1{k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector128 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector128 value) => ConvertToSByteWithSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector256 value) => ConvertToSByteWithSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToSByteWithSaturationAndZeroExtendToInt32(value, mode);
+
+ ///
+ /// VCVTPS2IUBS xmm1{k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector128 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector128 value) => ConvertToByteWithSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector256 value) => ConvertToByteWithSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndZeroExtendToInt32(value, mode);
+
+ ///
+ /// VCVTTPS2IBS xmm1{k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector128 ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32(Vector128 value) => ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {sae}
+ ///
+ public static Vector256 ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32(Vector256 value) => ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTTPS2IUBS xmm1{k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector128 ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32(Vector128 value) => ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {sae}
+ ///
+ public static Vector256 ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32(Vector256 value) => ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VMOVD xmm1, xmm2/m32
+ ///
+ public static Vector128 ConvertToVector128UInt32(Vector128 value) => ConvertToVector128UInt32(value);
+
+ ///
+ /// VMOVW xmm1, xmm2/m16
+ ///
+ public static Vector128 ConvertToVector128UInt16(Vector128 value) => ConvertToVector128UInt16(value);
+
+ ///
+ /// VADDPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Add(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Add(left, right, mode);
+
+ ///
+ /// VADDPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Add(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Add(left, right, mode);
+
+ ///
+ /// VDIVPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Divide(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Divide(left, right, mode);
+
+ ///
+ /// VDIVPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Divide(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Divide(left, right, mode);
+
+ ///
+ /// VCVTDQ2PS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
+
+ ///
+ /// VCVTPD2DQ xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128Int32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Int32(value, mode);
+
+ ///
+ /// VCVTPD2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Single(value, mode);
+
+ ///
+ /// VCVTPD2QQ ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Int64(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int64(value, mode);
+
+ ///
+ /// VCVTPD2UDQ xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128UInt32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128UInt32(value, mode);
+
+ ///
+ /// VCVTPD2UQQ ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 ConvertToVector256UInt64(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt64(value, mode);
+
+ ///
+ /// VCVTPS2DQ ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Int32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int32(value, mode);
+
+ ///
+ /// VCVTPS2QQ ymm1{k1}{z}, xmm2/m128/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Int64(Vector128 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int64(value, mode);
+
+ ///
+ /// VCVTPS2UDQ ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256UInt32(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt32(value, mode);
+
+ ///
+ /// VCVTPS2UQQ ymm1{k1}{z}, xmm2/m128/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256UInt64(Vector128 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt64(value, mode);
+
+ ///
+ /// VCVTQQ2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Single(value, mode);
+
+ ///
+ /// VCVTQQ2PD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Double(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Double(value, mode);
+
+ ///
+ /// VCVTUDQ2PS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
+
+ ///
+ /// VCVTUQQ2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Single(value, mode);
+
+ ///
+ /// VCVTUQQ2PD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 ConvertToVector256Double(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Double(value, mode);
+
+ ///
+ /// VMULPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Multiply(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Multiply(left, right, mode);
+
+ ///
+ /// VMULPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Multiply(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Multiply(left, right, mode);
+
+ ///
+ /// VSCALEFPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Scale(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Scale(left, right, mode);
+
+ ///
+ /// VSCALEFPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Scale(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Scale(left, right, mode);
+
+ ///
+ /// VSQRTPD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
+ ///
+ public static Vector256 Sqrt(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Sqrt(value, mode);
+
+ ///
+ /// VSQRTPS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
+ ///
+ public static Vector256 Sqrt(Vector256 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Sqrt(value, mode);
+
+ ///
+ /// VSUBPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
+ ///
+ public static Vector256 Subtract(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Subtract(left, right, mode);
+
+ ///
+ /// VSUBPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
+ ///
+ public static Vector256 Subtract(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Subtract(left, right, mode);
+
+ /// Provides access to the x86 AVX10.2 hardware instructions, that are only available to 64-bit processes, via intrinsics.
+ [Intrinsic]
+ public new abstract class X64 : Avx10v1.X64
+ {
+ internal X64() { }
+
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { get => IsSupported; }
+ }
+
+ /// Provides access to the x86 AVX10.2/512 hardware instructions via intrinsics.
+ [Intrinsic]
+ public new abstract class V512 : Avx10v1.V512
+ {
+ internal V512() { }
+
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { get => IsSupported; }
+
+ ///
+ /// VMINMAXPD zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst {sae}, imm8
+ ///
+ public static Vector512 MinMax(Vector512 left, Vector512 right, [ConstantExpected] byte control) => MinMax(left, right, control);
+
+ ///
+ /// VMINMAXPS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst {sae}, imm8
+ ///
+ public static Vector512 MinMax(Vector512 left, Vector512 right, [ConstantExpected] byte control) => MinMax(left, right, control);
+
+ ///
+ /// VCVTPS2IBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
+ ///
+ public static Vector512 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector512 value) => ConvertToSByteWithSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTPS2IBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
+ ///
+ public static Vector512 ConvertToSByteWithSaturationAndZeroExtendToInt32(Vector512 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToSByteWithSaturationAndZeroExtendToInt32(value, mode);
+
+ ///
+ /// VCVTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
+ ///
+ public static Vector512 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector512 value) => ConvertToByteWithSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
+ ///
+ public static Vector512 ConvertToByteWithSaturationAndZeroExtendToInt32(Vector512 value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndZeroExtendToInt32(value, mode);
+
+ ///
+ /// VCVTTPS2IBS zmm1{k1}{z}, zmm2/m512/m32bcst {sae}
+ ///
+ public static Vector512 ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32(Vector512 value) => ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VCVTTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {sae}
+ ///
+ public static Vector512 ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32(Vector512 value) => ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32(value);
+
+ ///
+ /// VMPSADBW zmm1{k1}{z}, zmm2, zmm3/m512, imm8
+ ///
+ public static Vector512 MultipleSumAbsoluteDifferences(Vector512 left, Vector512 right, [ConstantExpected] byte mask) => MultipleSumAbsoluteDifferences(left, right, mask);
+
+ ///