diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 0c83307ccca380..4d46570346cc24 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -5658,18 +5658,47 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
break;
}
+ case NI_AVX512F_ConvertToVector256Int32:
+ case NI_AVX512F_ConvertToVector256UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
+ {
+ assert(!varTypeIsFloating(baseType));
+ FALLTHROUGH;
+ }
+
+ case NI_AVX512F_ConvertToVector128Byte:
+ case NI_AVX512F_ConvertToVector128ByteWithSaturation:
case NI_AVX512F_ConvertToVector128Int16:
- case NI_AVX512F_ConvertToVector128Int32:
+ case NI_AVX512F_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector128SByteWithSaturation:
case NI_AVX512F_ConvertToVector128UInt16:
- case NI_AVX512F_ConvertToVector128UInt32:
+ case NI_AVX512F_ConvertToVector128UInt16WithSaturation:
case NI_AVX512F_ConvertToVector256Int16:
- case NI_AVX512F_ConvertToVector256Int32:
+ case NI_AVX512F_ConvertToVector256Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector256Int32WithSaturation:
case NI_AVX512F_ConvertToVector256UInt16:
- case NI_AVX512F_ConvertToVector256UInt32:
- case NI_AVX512BW_ConvertToVector128Byte:
- case NI_AVX512BW_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector256UInt16WithSaturation:
+ case NI_AVX512F_ConvertToVector256UInt32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Byte:
+ case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int16:
+ case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int32:
+ case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128SByte:
+ case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128UInt16:
+ case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation:
case NI_AVX512BW_ConvertToVector256Byte:
+ case NI_AVX512BW_ConvertToVector256ByteWithSaturation:
case NI_AVX512BW_ConvertToVector256SByte:
+ case NI_AVX512BW_ConvertToVector256SByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128Byte:
+ case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128SByte:
+ case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
{
// These intrinsics are "ins reg/mem, xmm"
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 33f3570deae2e2..90fb9b74a59585 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -1905,7 +1905,7 @@ class emitter
ssize_t emitGetInsCIdisp(instrDesc* id);
unsigned emitGetInsCIargs(instrDesc* id);
- inline static emitAttr emitGetMemOpSize(instrDesc* id);
+ inline emitAttr emitGetMemOpSize(instrDesc* id) const;
// Return the argument count for a direct call "id".
int emitGetInsCDinfo(instrDesc* id);
@@ -3456,11 +3456,12 @@ inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
// Arguments:
// id - Instruction descriptor
//
-/* static */ emitAttr emitter::emitGetMemOpSize(instrDesc* id)
+emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
{
- emitAttr defaultSize = id->idOpSize();
+ emitAttr defaultSize = id->idOpSize();
+ instruction ins = id->idIns();
- switch (id->idIns())
+ switch (ins)
{
case INS_pextrb:
case INS_pinsrb:
@@ -3570,9 +3571,6 @@ inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
case INS_cvtdq2pd:
case INS_cvtps2pd:
- case INS_vpmovdw:
- case INS_vpmovqd:
- case INS_vpmovwb:
{
if (defaultSize == 64)
{
@@ -3589,6 +3587,57 @@ inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
}
}
+ case INS_vpmovdb:
+ case INS_vpmovdw:
+ case INS_vpmovqb:
+ case INS_vpmovqd:
+ case INS_vpmovqw:
+ case INS_vpmovwb:
+ case INS_vpmovsdb:
+ case INS_vpmovsdw:
+ case INS_vpmovsqb:
+ case INS_vpmovsqd:
+ case INS_vpmovsqw:
+ case INS_vpmovswb:
+ case INS_vpmovusdb:
+ case INS_vpmovusdw:
+ case INS_vpmovusqb:
+ case INS_vpmovusqd:
+ case INS_vpmovusqw:
+ case INS_vpmovuswb:
+ {
+ insTupleType tupleType = insTupleTypeInfo(ins);
+ unsigned memSize = 0;
+
+ switch (tupleType)
+ {
+ case INS_TT_HALF_MEM:
+ {
+ memSize = defaultSize / 2;
+ break;
+ }
+
+ case INS_TT_QUARTER_MEM:
+ {
+ memSize = defaultSize / 4;
+ break;
+ }
+
+ case INS_TT_EIGHTH_MEM:
+ {
+ memSize = defaultSize / 8;
+ break;
+ }
+
+ default:
+ {
+ unreached();
+ }
+ }
+
+ return EA_ATTR(memSize);
+ }
+
case INS_vbroadcastf128:
case INS_vbroadcasti128:
case INS_vextractf128:
@@ -3613,7 +3662,11 @@ inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
case INS_movddup:
{
- if (defaultSize == 32)
+ if (defaultSize == 64)
+ {
+ return EA_64BYTE;
+ }
+ else if (defaultSize == 32)
{
return EA_32BYTE;
}
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index a0662e95f985db..fefbb90994dfee 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1362,6 +1362,10 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
case INS_shlx:
case INS_shrx:
#endif // TARGET_AMD64
+ case INS_vcvtsd2usi:
+ case INS_vcvtss2usi:
+ case INS_vcvttsd2usi:
+ case INS_vcvttss2usi:
{
if (attr == EA_8BYTE)
{
@@ -2582,6 +2586,10 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
case INS_sarx:
case INS_shrx:
#endif
+ case INS_vcvtsd2usi:
+ case INS_vcvtss2usi:
+ case INS_vcvttsd2usi:
+ case INS_vcvttss2usi:
{
// These SSE instructions write to a general purpose integer register.
return false;
@@ -3010,7 +3018,7 @@ inline bool hasTupleTypeInfo(instruction ins)
// Return Value:
// the tuple type info for a given CPU instruction.
//
-inline insTupleType insTupleTypeInfo(instruction ins)
+insTupleType emitter::insTupleTypeInfo(instruction ins) const
{
assert((unsigned)ins < ArrLen(insTupleTypeInfos));
assert(insTupleTypeInfos[ins] != INS_TT_NONE);
@@ -3020,9 +3028,9 @@ inline insTupleType insTupleTypeInfo(instruction ins)
// Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h.
bool emitter::EncodedBySSE38orSSE3A(instruction ins) const
{
- const size_t SSE38 = 0x0F660038;
- const size_t SSE3A = 0x0F66003A;
- const size_t MASK = 0xFFFF00FF;
+ const size_t SSE38 = 0x0F000038;
+ const size_t SSE3A = 0x0F00003A;
+ const size_t MASK = 0xFF0000FF;
size_t insCode = 0;
@@ -3044,8 +3052,19 @@ bool emitter::EncodedBySSE38orSSE3A(instruction ins) const
insCode = insCodeMR(ins);
}
- insCode &= MASK;
- return insCode == SSE38 || insCode == SSE3A;
+ size_t mskCode = insCode & MASK;
+
+ if ((mskCode != SSE38) && (mskCode != SSE3A))
+ {
+ return false;
+ }
+
+#if defined(DEBUG)
+ insCode = (insCode >> 16) & 0xFF;
+ assert((insCode == 0x66) || (insCode == 0xF2) || (insCode == 0xF3));
+#endif // DEBUG
+
+ return true;
}
/*****************************************************************************
@@ -11214,6 +11233,10 @@ void emitter::emitDispIns(
case INS_cvtss2si:
case INS_cvtsd2si:
case INS_cvttss2si:
+ case INS_vcvtsd2usi:
+ case INS_vcvtss2usi:
+ case INS_vcvttsd2usi:
+ case INS_vcvttss2usi:
{
printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
break;
@@ -15528,9 +15551,9 @@ ssize_t emitter::TryEvexCompressDisp8Byte(instrDesc* id, ssize_t dsp, bool* dspI
disp8Compression = inputSize * 4;
break;
case INS_TT_TUPLE8:
- // N = input size in bytes * 4, 32bit for 512 only
+ // N = input size in bytes * 8, 32bit for 512 only
assert((inputSize == 4 && vectorLength >= 64));
- disp8Compression = inputSize * 4;
+ disp8Compression = inputSize * 8;
break;
case INS_TT_HALF_MEM:
// N = vector length in bytes / 2
@@ -17825,11 +17848,39 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_cvttps2dq:
case INS_cvtps2dq:
case INS_cvtdq2ps:
+ case INS_vcvtpd2qq:
+ case INS_vcvtpd2uqq:
+ case INS_vcvtps2udq:
+ case INS_vcvtqq2pd:
+ case INS_vcvttps2udq:
+ case INS_vcvtudq2ps:
+ case INS_vcvttpd2qq:
+ case INS_vcvttpd2uqq:
+ case INS_vcvtuqq2pd:
+ result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+ result.insLatency += PERFSCORE_LATENCY_4C;
+ break;
+
+ case INS_vpmovdb:
case INS_vpmovdw:
+ case INS_vpmovqb:
case INS_vpmovqd:
+ case INS_vpmovqw:
+ case INS_vpmovsdb:
+ case INS_vpmovsdw:
+ case INS_vpmovsqb:
+ case INS_vpmovsqd:
+ case INS_vpmovsqw:
+ case INS_vpmovswb:
+ case INS_vpmovusdb:
+ case INS_vpmovusdw:
+ case INS_vpmovusqb:
+ case INS_vpmovusqd:
+ case INS_vpmovusqw:
+ case INS_vpmovuswb:
case INS_vpmovwb:
- result.insThroughput = PERFSCORE_THROUGHPUT_2X;
- result.insLatency += PERFSCORE_LATENCY_4C;
+ result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+ result.insLatency += (opSize == EA_16BYTE) ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_4C;
break;
case INS_haddps:
@@ -17892,12 +17943,20 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_cvtsi2ss32:
case INS_cvtsi2sd64:
case INS_cvtsi2ss64:
+ case INS_vcvtsd2usi:
+ case INS_vcvttsd2usi:
+ case INS_vcvtusi2sd32:
+ case INS_vcvtusi2sd64:
+ case INS_vcvtusi2ss32:
+ case INS_vcvtusi2ss64:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_7C;
break;
case INS_cvttss2si:
case INS_cvtss2si:
+ case INS_vcvtss2usi:
+ case INS_vcvttss2usi:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C;
break;
@@ -18241,6 +18300,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_cvtdq2pd:
case INS_cvtpd2ps:
case INS_cvttpd2dq:
+ case INS_vcvtpd2udq:
+ case INS_vcvtps2qq:
+ case INS_vcvtps2uqq:
+ case INS_vcvtqq2ps:
+ case INS_vcvttpd2udq:
+ case INS_vcvttps2qq:
+ case INS_vcvttps2uqq:
+ case INS_vcvtudq2pd:
+ case INS_vcvtuqq2ps:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_7C : PERFSCORE_LATENCY_5C;
break;
@@ -18282,17 +18350,25 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vpbroadcastq_gpr:
case INS_vbroadcasti128:
case INS_vbroadcastf128:
+ case INS_vbroadcastf64x2:
+ case INS_vbroadcasti64x2:
+ case INS_vbroadcastf64x4:
+ case INS_vbroadcasti64x4:
+ case INS_vbroadcastf32x2:
+ case INS_vbroadcasti32x2:
+ case INS_vbroadcastf32x8:
+ case INS_vbroadcasti32x8:
case INS_vbroadcastss:
case INS_vbroadcastsd:
if (memAccessKind == PERFSCORE_MEMORY_NONE)
{
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
- result.insLatency = opSize == EA_32BYTE ? PERFSCORE_LATENCY_3C : PERFSCORE_LATENCY_1C;
+ result.insLatency = opSize == EA_16BYTE ? PERFSCORE_LATENCY_1C : PERFSCORE_LATENCY_3C;
}
else
{
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
- result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_3C : PERFSCORE_LATENCY_2C;
+ result.insLatency += opSize == EA_16BYTE ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_3C;
if (ins == INS_vpbroadcastb || ins == INS_vpbroadcastw)
{
result.insLatency += PERFSCORE_LATENCY_1C;
diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h
index 25846609b507fc..0c01f9aaca7b3c 100644
--- a/src/coreclr/jit/emitxarch.h
+++ b/src/coreclr/jit/emitxarch.h
@@ -183,6 +183,8 @@ code_t AddVexPrefixIfNeededAndNotPresent(instruction ins, code_t code, emitAttr
return code;
}
+insTupleType insTupleTypeInfo(instruction ins) const;
+
//------------------------------------------------------------------------
// HasKMaskRegisterDest: Temporary check to identify instructions that can
// be Evex encoded but require Opmask(KMask) register support.
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 5ab220f5791955..cd4c39a0bcc8f6 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -19025,6 +19025,18 @@ bool GenTree::isContainableHWIntrinsic() const
return true;
}
+ case NI_AVX512F_ConvertToVector256Int32:
+ case NI_AVX512F_ConvertToVector256UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
+ {
+ if (varTypeIsFloating(AsHWIntrinsic()->GetSimdBaseType()))
+ {
+ return false;
+ }
+ FALLTHROUGH;
+ }
+
case NI_Vector128_GetElement:
case NI_SSE2_ConvertToInt32:
case NI_SSE2_ConvertToUInt32:
@@ -19039,18 +19051,38 @@ bool GenTree::isContainableHWIntrinsic() const
case NI_AVX2_ExtractVector128:
case NI_AVX512F_ExtractVector128:
case NI_AVX512F_ExtractVector256:
+ case NI_AVX512F_ConvertToVector128Byte:
+ case NI_AVX512F_ConvertToVector128ByteWithSaturation:
case NI_AVX512F_ConvertToVector128Int16:
- case NI_AVX512F_ConvertToVector128Int32:
+ case NI_AVX512F_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector128SByteWithSaturation:
case NI_AVX512F_ConvertToVector128UInt16:
- case NI_AVX512F_ConvertToVector128UInt32:
+ case NI_AVX512F_ConvertToVector128UInt16WithSaturation:
case NI_AVX512F_ConvertToVector256Int16:
- case NI_AVX512F_ConvertToVector256Int32:
+ case NI_AVX512F_ConvertToVector256Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector256Int32WithSaturation:
case NI_AVX512F_ConvertToVector256UInt16:
- case NI_AVX512F_ConvertToVector256UInt32:
- case NI_AVX512BW_ConvertToVector128Byte:
- case NI_AVX512BW_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector256UInt16WithSaturation:
+ case NI_AVX512F_ConvertToVector256UInt32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Byte:
+ case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int16:
+ case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int32:
+ case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128SByte:
+ case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128UInt16:
+ case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation:
case NI_AVX512BW_ConvertToVector256Byte:
+ case NI_AVX512BW_ConvertToVector256ByteWithSaturation:
case NI_AVX512BW_ConvertToVector256SByte:
+ case NI_AVX512BW_ConvertToVector256SByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128Byte:
+ case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128SByte:
+ case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
case NI_AVX512DQ_ExtractVector128:
case NI_AVX512DQ_ExtractVector256:
{
@@ -22702,7 +22734,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(
}
else
{
- intrinsicId = NI_AVX512BW_ConvertToVector128SByte;
+ intrinsicId = NI_AVX512BW_VL_ConvertToVector128SByte;
}
opBaseJitType = CORINFO_TYPE_SHORT;
@@ -22717,7 +22749,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(
}
else
{
- intrinsicId = NI_AVX512BW_ConvertToVector128Byte;
+ intrinsicId = NI_AVX512BW_VL_ConvertToVector128Byte;
}
opBaseJitType = CORINFO_TYPE_USHORT;
@@ -22732,7 +22764,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(
}
else
{
- intrinsicId = NI_AVX512F_ConvertToVector128Int16;
+ intrinsicId = NI_AVX512F_VL_ConvertToVector128Int16;
}
opBaseJitType = CORINFO_TYPE_INT;
@@ -22747,7 +22779,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(
}
else
{
- intrinsicId = NI_AVX512F_ConvertToVector128UInt16;
+ intrinsicId = NI_AVX512F_VL_ConvertToVector128UInt16;
}
opBaseJitType = CORINFO_TYPE_UINT;
@@ -22762,7 +22794,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(
}
else
{
- intrinsicId = NI_AVX512F_ConvertToVector128Int32;
+ intrinsicId = NI_AVX512F_VL_ConvertToVector128Int32;
}
opBaseJitType = CORINFO_TYPE_LONG;
@@ -22777,7 +22809,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(
}
else
{
- intrinsicId = NI_AVX512F_ConvertToVector128UInt32;
+ intrinsicId = NI_AVX512F_VL_ConvertToVector128UInt32;
}
opBaseJitType = CORINFO_TYPE_ULONG;
@@ -25733,7 +25765,7 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge
{
assert((gtHWIntrinsicId >= NI_AVX512F_FusedMultiplyAdd) &&
(gtHWIntrinsicId <= NI_AVX512F_FusedMultiplySubtractNegated));
- assert((NI_AVX512F_FusedMultiplySubtractNegated - NI_AVX512F_FusedMultiplyAdd) == 6);
+ assert((NI_AVX512F_FusedMultiplySubtractNegated - NI_AVX512F_FusedMultiplyAdd) + 1 == 6);
}
#endif // DEBUG
diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
index 4d02db9bfdd5e9..5bffbe41c839af 100644
--- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
@@ -386,7 +386,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case InstructionSet_AVX:
case InstructionSet_AVX2:
case InstructionSet_AVX512F:
+ case InstructionSet_AVX512F_VL:
+ case InstructionSet_AVX512F_X64:
case InstructionSet_AVX512BW:
+ case InstructionSet_AVX512BW_VL:
genAvxFamilyIntrinsic(node);
break;
case InstructionSet_AES:
@@ -1637,15 +1640,23 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
- var_types baseType = node->GetSimdBaseType();
- emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
- var_types targetType = node->TypeGet();
- instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- size_t numArgs = node->GetOperandCount();
- GenTree* op1 = node->Op(1);
- regNumber op1Reg = REG_NA;
- regNumber targetReg = node->GetRegNum();
- emitter* emit = GetEmitter();
+
+ if ((intrinsicId >= NI_AVX512F_FusedMultiplyAdd) && (intrinsicId <= NI_AVX512F_FusedMultiplySubtractNegated))
+ {
+ assert((NI_AVX512F_FusedMultiplySubtractNegated - NI_AVX512F_FusedMultiplyAdd) + 1 == 6);
+ genFMAIntrinsic(node);
+ return;
+ }
+
+ var_types baseType = node->GetSimdBaseType();
+ emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
+ var_types targetType = node->TypeGet();
+ instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
+ size_t numArgs = node->GetOperandCount();
+ GenTree* op1 = node->Op(1);
+ regNumber op1Reg = REG_NA;
+ regNumber targetReg = node->GetRegNum();
+ emitter* emit = GetEmitter();
genConsumeMultiOpOperands(node);
@@ -1866,6 +1877,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
emit->emitIns_R_R_R_I(compareIns, attr, targetReg, op1Reg, op2Reg, 6);
break;
}
+
case NI_AVX512F_CompareLessThanOrEqualSpecial:
{
GenTree* op2 = node->Op(2);
@@ -1895,6 +1907,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
emit->emitIns_R_R_R_I(compareIns, attr, targetReg, op1Reg, op2Reg, 1);
break;
}
+
case NI_AVX512F_MoveMaskToVectorSpecial:
{
op1Reg = op1->GetRegNum();
@@ -1921,7 +1934,23 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_AVX512F_ConvertToUInt32:
+ case NI_AVX512F_ConvertToUInt32WithTruncation:
+ case NI_AVX512F_X64_ConvertToUInt64:
+ case NI_AVX512F_X64_ConvertToUInt64WithTruncation:
+ {
+ assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT);
+ emitAttr attr = emitTypeSize(targetType);
+
+ instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
+ genHWIntrinsic_R_RM(node, ins, attr, targetReg, node->Op(1));
+ break;
+ }
+
case NI_AVX512F_ConvertToVector256Int32:
+ case NI_AVX512F_ConvertToVector256UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
{
if (varTypeIsFloating(baseType))
{
@@ -1932,17 +1961,38 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
FALLTHROUGH;
}
+ case NI_AVX512F_ConvertToVector128Byte:
+ case NI_AVX512F_ConvertToVector128ByteWithSaturation:
case NI_AVX512F_ConvertToVector128Int16:
- case NI_AVX512F_ConvertToVector128Int32:
+ case NI_AVX512F_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector128SByteWithSaturation:
case NI_AVX512F_ConvertToVector128UInt16:
- case NI_AVX512F_ConvertToVector128UInt32:
+ case NI_AVX512F_ConvertToVector128UInt16WithSaturation:
case NI_AVX512F_ConvertToVector256Int16:
+ case NI_AVX512F_ConvertToVector256Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector256Int32WithSaturation:
case NI_AVX512F_ConvertToVector256UInt16:
- case NI_AVX512F_ConvertToVector256UInt32:
- case NI_AVX512BW_ConvertToVector128Byte:
- case NI_AVX512BW_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector256UInt16WithSaturation:
+ case NI_AVX512F_ConvertToVector256UInt32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Byte:
+ case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int16:
+ case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int32:
+ case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128SByte:
+ case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128UInt16:
+ case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation:
case NI_AVX512BW_ConvertToVector256Byte:
+ case NI_AVX512BW_ConvertToVector256ByteWithSaturation:
case NI_AVX512BW_ConvertToVector256SByte:
+ case NI_AVX512BW_ConvertToVector256SByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128Byte:
+ case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128SByte:
+ case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
{
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
@@ -1954,6 +2004,15 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_AVX512F_X64_ConvertScalarToVector128Double:
+ case NI_AVX512F_X64_ConvertScalarToVector128Single:
+ {
+ assert(baseType == TYP_ULONG);
+ instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
+ genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE);
+ break;
+ }
+
default:
unreached();
break;
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index c9de9fdafd2d32..c8e81f69f50502 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -833,22 +833,36 @@ HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512,
HARDWARE_INTRINSIC(AVX512F, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid, INS_invalid, INS_vbroadcastf128, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_invalid, INS_vbroadcastf64x4}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, Ceiling, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Byte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector128ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector128SByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector128SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Single, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Double, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Double, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32, 64, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int64, 64, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Single, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32, 64, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Single, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32, 64, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt64, 64, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, Divide, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
@@ -875,8 +889,8 @@ HARDWARE_INTRINSIC(AVX512F, Or,
HARDWARE_INTRINSIC(AVX512F, Permute2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512F, Permute4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512F, Permute4x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX512F, PermuteVar2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX512F, PermuteVar4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512F, PermuteVar2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512F, PermuteVar4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, PermuteVar8x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AVX512F, PermuteVar16x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
@@ -903,10 +917,39 @@ HARDWARE_INTRINSIC(AVX512F, Xor,
HARDWARE_INTRINSIC(AVX512F_VL, Abs, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F_VL, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX512F_VL, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// ISA Function name SIMD size NumArg Instructions Category Flags
+// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// AVX512F.X64 Intrinsics
+HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
@@ -918,10 +961,10 @@ HARDWARE_INTRINSIC(AVX512BW, AddSaturate,
HARDWARE_INTRINSIC(AVX512BW, AlignRight, 64, 3, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512BW, Average, 64, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX512BW, BroadcastScalarToVector512, 64, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad)
-HARDWARE_INTRINSIC(AVX512BW, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512BW, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256Byte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256SByte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512Int16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512UInt16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512BW, LoadVector512, 64, 1, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
@@ -957,6 +1000,10 @@ HARDWARE_INTRINSIC(AVX512BW, UnpackLow,
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVX512BW.VL Intrinsics
+HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar8x16 , 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AVX512BW_VL, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
@@ -973,6 +1020,12 @@ HARDWARE_INTRINSIC(AVX512DQ, AndNot,
HARDWARE_INTRINSIC(AVX512DQ, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_invalid, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_invalid, INS_invalid, INS_vbroadcastf32x8, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector256Single, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Double, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512DQ, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti64x2, INS_vextracti64x2, INS_invalid, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512DQ, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_invalid, INS_invalid, INS_vextractf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512DQ, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti64x2, INS_vinserti64x2, INS_invalid, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM)
@@ -986,8 +1039,19 @@ HARDWARE_INTRINSIC(AVX512DQ, Xor,
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVX512DQ.VL Intrinsics
-HARDWARE_INTRINSIC(AVX512DQ_VL, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512DQ_VL, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512DQ_VL, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512DQ_VL, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index ede345a40f6da8..73f6dd45e704b5 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -615,6 +615,20 @@ INST3(vbroadcastf64x2, "broadcastf64x2", IUM_WR, BAD_CODE, BAD_
INST3(vbroadcasti64x2, "broadcasti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register
INST3(vbroadcastf64x4, "broadcastf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register
INST3(vbroadcasti64x4, "broadcasti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register
+INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt packed doubles to unsigned DWORDs
+INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // cvt packed singles to unsigned DWORDs
+INST3(vcvtsd2usi, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD
+INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD
+INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs
+INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs
+INST3(vcvttsd2usi, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD/QWORD
+INST3(vcvttss2usi, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD
+INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // cvt packed unsigned DWORDs to doubles
+INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // cvt packed unsigned DWORDs to singles
+INST3(vcvtusi2sd32, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to double
+INST3(vcvtusi2sd64, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to double
+INST3(vcvtusi2ss32, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to single
+INST3(vcvtusi2ss64, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to single
INST3(vextractf64x4, "extractf64x4", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values
INST3(vextracti64x4, "extracti64x4", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // Extract 256-bit packed quadword integer values
INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values
@@ -632,8 +646,21 @@ INST3(vpmaxsq, "pmaxsq", IUM_WR, BAD_CODE, BAD_
INST3(vpmaxuq, "pmaxuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 64-bit unsigned integers
INST3(vpminsq, "pminsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit signed integers
INST3(vpminuq, "pminuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit unsigned integers
+INST3(vpmovdb, "pmovdb", IUM_WR, PSSE38(0xF3, 0x31), BAD_CODE, PSSE38(0xF3, 0x31), INS_TT_QUARTER_MEM, Input_32Bit | REX_W0_EVEX | Encoding_EVEX)
INST3(vpmovdw, "pmovdw", IUM_WR, PSSE38(0xF3, 0x33), BAD_CODE, PSSE38(0xF3, 0x33), INS_TT_HALF_MEM, Input_32Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovqb, "pmovqb", IUM_WR, PSSE38(0xF3, 0x32), BAD_CODE, PSSE38(0xF3, 0x32), INS_TT_EIGHTH_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
INST3(vpmovqd, "pmovqd", IUM_WR, PSSE38(0xF3, 0x35), BAD_CODE, PSSE38(0xF3, 0x35), INS_TT_HALF_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovqw, "pmovqw", IUM_WR, PSSE38(0xF3, 0x34), BAD_CODE, PSSE38(0xF3, 0x34), INS_TT_QUARTER_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovsdb, "pmovsdb", IUM_WR, PSSE38(0xF3, 0x21), BAD_CODE, PSSE38(0xF3, 0x21), INS_TT_QUARTER_MEM, Input_32Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovsdw, "pmovsdw", IUM_WR, PSSE38(0xF3, 0x23), BAD_CODE, PSSE38(0xF3, 0x23), INS_TT_HALF_MEM, Input_32Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovsqb, "pmovsqb", IUM_WR, PSSE38(0xF3, 0x22), BAD_CODE, PSSE38(0xF3, 0x22), INS_TT_EIGHTH_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovsqd, "pmovsqd", IUM_WR, PSSE38(0xF3, 0x25), BAD_CODE, PSSE38(0xF3, 0x25), INS_TT_HALF_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovsqw, "pmovsqw", IUM_WR, PSSE38(0xF3, 0x24), BAD_CODE, PSSE38(0xF3, 0x24), INS_TT_QUARTER_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovusdb, "pmovusdb", IUM_WR, PSSE38(0xF3, 0x11), BAD_CODE, PSSE38(0xF3, 0x11), INS_TT_QUARTER_MEM, Input_32Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovusdw, "pmovusdw", IUM_WR, PSSE38(0xF3, 0x13), BAD_CODE, PSSE38(0xF3, 0x13), INS_TT_HALF_MEM, Input_32Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovusqb, "pmovusqb", IUM_WR, PSSE38(0xF3, 0x12), BAD_CODE, PSSE38(0xF3, 0x12), INS_TT_EIGHTH_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovusqd, "pmovusqd", IUM_WR, PSSE38(0xF3, 0x15), BAD_CODE, PSSE38(0xF3, 0x15), INS_TT_HALF_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovusqw, "pmovusqw", IUM_WR, PSSE38(0xF3, 0x14), BAD_CODE, PSSE38(0xF3, 0x14), INS_TT_QUARTER_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs
INST3(vpsraq, "psraq", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 64-bit integers
INST3(vpsravq, "psravq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic
@@ -652,18 +679,20 @@ INST3(vmovdqu16, "movdqu16", IUM_WR, SSEFLT(0x7F), BAD_
INST3(vpbroadcastb_gpr, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7A), INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_EVEX) // Broadcast int8 value from gpr to entire register
INST3(vpbroadcastw_gpr, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7B), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Broadcast int16 value from gpr to entire register
INST3(vpcmpb, "pcmpb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), INS_TT_FULL_MEM, Input_8Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsMskSrcSrcEvexInstruction)
-INST3(vpcmpw, "pcmpw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), INS_TT_FULL, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsMskSrcSrcEvexInstruction)
+INST3(vpcmpw, "pcmpw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), INS_TT_FULL_MEM, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsMskSrcSrcEvexInstruction)
INST3(vpcmpub, "pcmpub", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), INS_TT_FULL_MEM, Input_8Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsMskSrcSrcEvexInstruction)
-INST3(vpcmpuw, "pcmpuw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), INS_TT_FULL, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsMskSrcSrcEvexInstruction)
+INST3(vpcmpuw, "pcmpuw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), INS_TT_FULL_MEM, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsMskSrcSrcEvexInstruction)
INST3(vpermw, "permw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), INS_TT_FULL_MEM, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Doublewords Elements
INST3(vpmovb2m, "pmovb2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), INS_TT_NONE, Input_8Bit | REX_W0_EVEX | Encoding_EVEX)
INST3(vpmovm2b, "pmovm2b", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), INS_TT_NONE, Input_8Bit | REX_W0_EVEX | Encoding_EVEX)
INST3(vpmovm2w, "pmovm2w", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), INS_TT_NONE, Input_16Bit | REX_W1_EVEX | Encoding_EVEX)
INST3(vpmovw2m, "pmovw2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), INS_TT_NONE, Input_16Bit | REX_W1_EVEX | Encoding_EVEX)
INST3(vpmovwb, "pmovwb", IUM_WR, PSSE38(0xF3, 0x30), BAD_CODE, PSSE38(0xF3, 0x30), INS_TT_HALF_MEM, Input_16Bit | REX_W0_EVEX | Encoding_EVEX)
-INST3(vpsllvw, "psllvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x12), INS_TT_FULL, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical
-INST3(vpsravw, "psravq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x11), INS_TT_FULL, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic
-INST3(vpsrlvw, "psrlvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_TT_FULL, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical
+INST3(vpmovswb, "pmovswb", IUM_WR, PSSE38(0xF3, 0x20), BAD_CODE, PSSE38(0xF3, 0x20), INS_TT_HALF_MEM, Input_16Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpmovuswb, "pmovuswb", IUM_WR, PSSE38(0xF3, 0x10), BAD_CODE, PSSE38(0xF3, 0x10), INS_TT_HALF_MEM, Input_16Bit | REX_W0_EVEX | Encoding_EVEX)
+INST3(vpsllvw, "psllvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x12), INS_TT_FULL_MEM, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical
+INST3(vpsravw, "psravw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x11), INS_TT_FULL_MEM, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic
+INST3(vpsrlvw, "psrlvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_TT_FULL_MEM, Input_16Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical
// AVX512DQ
INST3(kortestb, "kortestb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction)
@@ -673,6 +702,18 @@ INST3(vbroadcastf32x2, "broadcastf32x2", IUM_WR, BAD_CODE, BAD_
INST3(vbroadcasti32x2, "broadcasti32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_TT_TUPLE2, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register
INST3(vbroadcastf32x8, "broadcastf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register
INST3(vbroadcasti32x8, "broadcasti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register
+INST3(vcvtpd2qq, "cvtpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt packed doubles to signed QWORDs
+INST3(vcvtpd2uqq, "cvtpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt packed doubles to unsigned QWORDs
+INST3(vcvtps2qq, "cvtps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // cvt packed singles to signed QWORDs
+INST3(vcvtps2uqq, "cvtps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // cvt packed singles to unsigned QWORDs
+INST3(vcvtqq2pd, "cvtqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt packed signed QWORDs to doubles
+INST3(vcvtqq2ps, "cvtqq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt packed signed QWORDs to singles
+INST3(vcvttpd2qq, "cvttpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt w/ truncation packed doubles to signed QWORDs
+INST3(vcvttpd2uqq, "cvttpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned QWORDs
+INST3(vcvttps2qq, "cvttps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // cvt w/ truncation packed singles to signed QWORDs
+INST3(vcvttps2uqq, "cvttps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned QWORDs
+INST3(vcvtuqq2pd, "cvtuqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt packed signed QWORDs to doubles
+INST3(vcvtuqq2ps, "cvtuqq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // cvt packed signed QWORDs to singles
INST3(vextractf32x8, "extractf32x8", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values
INST3(vextractf64x2, "extractf64x2", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_TT_TUPLE2, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values
INST3(vextracti32x8, "extracti32x8", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // Extract 256-bit packed quadword integer values
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index caed09f2f2aa0b..ca3c64dd2ff24a 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -5848,6 +5848,9 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node)
}
case NI_AVX512F_ConvertToVector256Int32:
+ case NI_AVX512F_ConvertToVector256UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
{
if (varTypeIsFloating(simdBaseType))
{
@@ -5856,31 +5859,73 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node)
FALLTHROUGH;
}
+ case NI_AVX512F_ConvertToVector128Byte:
+ case NI_AVX512F_ConvertToVector128ByteWithSaturation:
case NI_AVX512F_ConvertToVector128Int16:
- case NI_AVX512F_ConvertToVector128Int32:
+ case NI_AVX512F_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector128SByteWithSaturation:
case NI_AVX512F_ConvertToVector128UInt16:
- case NI_AVX512F_ConvertToVector128UInt32:
+ case NI_AVX512F_ConvertToVector128UInt16WithSaturation:
case NI_AVX512F_ConvertToVector256Int16:
+ case NI_AVX512F_ConvertToVector256Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector256Int32WithSaturation:
case NI_AVX512F_ConvertToVector256UInt16:
- case NI_AVX512F_ConvertToVector256UInt32:
- case NI_AVX512BW_ConvertToVector128Byte:
- case NI_AVX512BW_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector256UInt16WithSaturation:
+ case NI_AVX512F_ConvertToVector256UInt32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Byte:
+ case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int16:
+ case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int32:
+ case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128SByte:
+ case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128UInt16:
+ case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation:
case NI_AVX512BW_ConvertToVector256Byte:
+ case NI_AVX512BW_ConvertToVector256ByteWithSaturation:
case NI_AVX512BW_ConvertToVector256SByte:
+ case NI_AVX512BW_ConvertToVector256SByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128Byte:
+ case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128SByte:
+ case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
{
// These intrinsics are "ins reg/mem, xmm"
- unsigned simdSize = hwintrinsic->GetSimdSize();
+ instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType);
+ insTupleType tupleType = comp->GetEmitter()->insTupleTypeInfo(ins);
+ unsigned simdSize = hwintrinsic->GetSimdSize();
+ unsigned memSize = 0;
- if (simdSize == 16)
+ switch (tupleType)
{
- // For TYP_SIMD16, we produce a TYP_SIMD16 register
- // but only store TYP_SIMD8 to memory and so we cannot
- // contain without additional work.
- isContainable = false;
+ case INS_TT_HALF_MEM:
+ {
+ memSize = simdSize / 2;
+ break;
+ }
+
+ case INS_TT_QUARTER_MEM:
+ {
+ memSize = simdSize / 4;
+ break;
+ }
+
+ case INS_TT_EIGHTH_MEM:
+ {
+ memSize = simdSize / 8;
+ break;
+ }
+
+ default:
+ {
+ unreached();
+ }
}
- else
+
+ if (genTypeSize(node) == memSize)
{
- assert((simdSize == 32) || (simdSize == 64));
isContainable = true;
}
break;
@@ -6943,6 +6988,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
case NI_AVX2_Shuffle:
case NI_AVX2_ShuffleHigh:
case NI_AVX2_ShuffleLow:
+ case NI_AVX512F_Permute2x64:
+ case NI_AVX512F_Permute4x32:
+ case NI_AVX512F_Permute4x64:
case NI_AVX512F_ShiftLeftLogical:
case NI_AVX512F_ShiftRightArithmetic:
case NI_AVX512F_ShiftRightLogical:
@@ -7130,6 +7178,10 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
case NI_SSE2_X64_ConvertScalarToVector128Double:
case NI_SSE2_X64_ConvertScalarToVector128Int64:
case NI_SSE2_X64_ConvertScalarToVector128UInt64:
+ case NI_AVX512F_ConvertScalarToVector128Double:
+ case NI_AVX512F_ConvertScalarToVector128Single:
+ case NI_AVX512F_X64_ConvertScalarToVector128Double:
+ case NI_AVX512F_X64_ConvertScalarToVector128Single:
{
if (!varTypeIsIntegral(childNode->TypeGet()))
{
@@ -7281,23 +7333,53 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
return supportsSIMDScalarLoads;
}
+ case NI_AVX512F_ConvertToVector256Int32:
+ case NI_AVX512F_ConvertToVector256UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
+ {
+ // These ones are not containable as stores when the base
+ // type is a floating-point type
+ FALLTHROUGH;
+ }
+
case NI_Vector128_GetElement:
case NI_AVX_ExtractVector128:
case NI_AVX2_ExtractVector128:
case NI_AVX512F_ExtractVector128:
case NI_AVX512F_ExtractVector256:
+ case NI_AVX512F_ConvertToVector128Byte:
+ case NI_AVX512F_ConvertToVector128ByteWithSaturation:
case NI_AVX512F_ConvertToVector128Int16:
- case NI_AVX512F_ConvertToVector128Int32:
+ case NI_AVX512F_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector128SByteWithSaturation:
case NI_AVX512F_ConvertToVector128UInt16:
- case NI_AVX512F_ConvertToVector128UInt32:
+ case NI_AVX512F_ConvertToVector128UInt16WithSaturation:
case NI_AVX512F_ConvertToVector256Int16:
- case NI_AVX512F_ConvertToVector256Int32:
+ case NI_AVX512F_ConvertToVector256Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector256Int32WithSaturation:
case NI_AVX512F_ConvertToVector256UInt16:
- case NI_AVX512F_ConvertToVector256UInt32:
- case NI_AVX512BW_ConvertToVector128Byte:
- case NI_AVX512BW_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector256UInt16WithSaturation:
+ case NI_AVX512F_ConvertToVector256UInt32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Byte:
+ case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int16:
+ case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int32:
+ case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128SByte:
+ case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128UInt16:
+ case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation:
case NI_AVX512BW_ConvertToVector256Byte:
+ case NI_AVX512BW_ConvertToVector256ByteWithSaturation:
case NI_AVX512BW_ConvertToVector256SByte:
+ case NI_AVX512BW_ConvertToVector256SByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128Byte:
+ case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128SByte:
+ case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
case NI_AVX512DQ_ExtractVector128:
case NI_AVX512DQ_ExtractVector256:
{
@@ -7504,6 +7586,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
}
case NI_AVX512F_ConvertToVector256Int32:
+ case NI_AVX512F_ConvertToVector256UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32:
+ case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
{
if (varTypeIsFloating(simdBaseType))
{
@@ -7514,17 +7599,38 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
FALLTHROUGH;
}
+ case NI_AVX512F_ConvertToVector128Byte:
+ case NI_AVX512F_ConvertToVector128ByteWithSaturation:
case NI_AVX512F_ConvertToVector128Int16:
- case NI_AVX512F_ConvertToVector128Int32:
+ case NI_AVX512F_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector128SByteWithSaturation:
case NI_AVX512F_ConvertToVector128UInt16:
- case NI_AVX512F_ConvertToVector128UInt32:
+ case NI_AVX512F_ConvertToVector128UInt16WithSaturation:
case NI_AVX512F_ConvertToVector256Int16:
+ case NI_AVX512F_ConvertToVector256Int16WithSaturation:
+ case NI_AVX512F_ConvertToVector256Int32WithSaturation:
case NI_AVX512F_ConvertToVector256UInt16:
- case NI_AVX512F_ConvertToVector256UInt32:
- case NI_AVX512BW_ConvertToVector128Byte:
- case NI_AVX512BW_ConvertToVector128SByte:
+ case NI_AVX512F_ConvertToVector256UInt16WithSaturation:
+ case NI_AVX512F_ConvertToVector256UInt32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Byte:
+ case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int16:
+ case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128Int32:
+ case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128SByte:
+ case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation:
+ case NI_AVX512F_VL_ConvertToVector128UInt16:
+ case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation:
case NI_AVX512BW_ConvertToVector256Byte:
+ case NI_AVX512BW_ConvertToVector256ByteWithSaturation:
case NI_AVX512BW_ConvertToVector256SByte:
+ case NI_AVX512BW_ConvertToVector256SByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128Byte:
+ case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
+ case NI_AVX512BW_VL_ConvertToVector128SByte:
+ case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
{
// These intrinsics are "ins reg/mem, xmm" and get
// contained by the relevant store operation instead.
@@ -7691,6 +7797,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AVX2_Permute4x64:
case NI_AVX2_ShuffleHigh:
case NI_AVX2_ShuffleLow:
+ case NI_AVX512F_Permute2x64:
+ case NI_AVX512F_Permute4x32:
+ case NI_AVX512F_Permute4x64:
case NI_AVX512F_Shuffle:
case NI_AVX512BW_ShuffleHigh:
case NI_AVX512BW_ShuffleLow:
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.PlatformNotSupported.cs
index e4a8fe97408acf..4b6fcabee54e35 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.PlatformNotSupported.cs
@@ -22,6 +22,68 @@ internal VL() { }
public static new bool IsSupported { [Intrinsic] get { return false; } }
+ ///
+ /// __m128i _mm_cvtepi16_epi8 (__m128i a)
+ /// VPMOVWB xmm1/m64 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtepi16_epi8 (__m128i a)
+ /// VPMOVWB xmm1/m64 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtepi16_epi8 (__m256i a)
+ /// VPMOVWB xmm1/m128 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtepi16_epi8 (__m256i a)
+ /// VPMOVWB xmm1/m128 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtusepi16_epi8 (__m128i a)
+ /// VPMOVUWB xmm1/m64 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128ByteWithSaturation(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtusepi16_epi8 (__m256i a)
+ /// VPMOVUWB xmm1/m128 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128ByteWithSaturation(Vector256 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m128i _mm_cvtepi16_epi8 (__m128i a)
+ /// VPMOVWB xmm1/m64 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByte(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtepi16_epi8 (__m128i a)
+ /// VPMOVWB xmm1/m64 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByte(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtepi16_epi8 (__m256i a)
+ /// VPMOVWB xmm1/m128 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByte(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtepi16_epi8 (__m256i a)
+ /// VPMOVWB xmm1/m128 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByte(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtsepi16_epi8 (__m128i a)
+ /// VPMOVSWB xmm1/m64 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByteWithSaturation(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtsepi16_epi8 (__m256i a)
+ /// VPMOVSWB xmm1/m128 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByteWithSaturation(Vector256 value) { throw new PlatformNotSupportedException(); }
+
///
/// __m128i _mm_permutevar8x16_epi16 (__m128i a, __m128i b)
/// VPERMW xmm1 {k1}{z}, xmm2, xmm3/m128
@@ -201,6 +263,38 @@ internal X64() { }
///
public static Vector512 BroadcastScalarToVector512(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm512_cvtepi16_epi8 (__m512i a)
+ /// VPMOVWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256Byte(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm512_cvtepi16_epi8 (__m512i a)
+ /// VPMOVWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256Byte(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm512_cvtusepi16_epi8 (__m512i a)
+ /// VPMOVUWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256ByteWithSaturation(Vector512 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m256i _mm512_cvtepi16_epi8 (__m512i a)
+ /// VPMOVWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256SByte(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm512_cvtepi16_epi8 (__m512i a)
+ /// VPMOVWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256SByte(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm512_cvtsepi16_epi8 (__m512i a)
+ /// VPMOVSWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256SByteWithSaturation(Vector512 value) { throw new PlatformNotSupportedException(); }
+
///
/// __m512i _mm512_cvtepi8_epi16 (__m128i a)
/// VPMOVSXBW zmm1 {k1}{z}, ymm2/m256
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs
index d3211bed6dc8b0..79a3219fe4c4ad 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs
@@ -22,6 +22,68 @@ internal VL() { }
public static new bool IsSupported { get => IsSupported; }
+ ///
+ /// __m128i _mm_cvtepi16_epi8 (__m128i a)
+ /// VPMOVWB xmm1/m64 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector128 value) => ConvertToVector128Byte(value);
+ ///
+ /// __m128i _mm_cvtepi16_epi8 (__m128i a)
+ /// VPMOVWB xmm1/m64 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector128 value) => ConvertToVector128Byte(value);
+ ///
+ /// __m128i _mm256_cvtepi16_epi8 (__m256i a)
+ /// VPMOVWB xmm1/m128 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector256 value) => ConvertToVector128Byte(value);
+ ///
+ /// __m128i _mm256_cvtepi16_epi8 (__m256i a)
+ /// VPMOVWB xmm1/m128 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector256 value) => ConvertToVector128Byte(value);
+ ///
+ /// __m128i _mm_cvtusepi16_epi8 (__m128i a)
+ /// VPMOVUWB xmm1/m64 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128ByteWithSaturation(Vector128 value) => ConvertToVector128ByteWithSaturation(value);
+ ///
+ /// __m128i _mm256_cvtusepi16_epi8 (__m256i a)
+ /// VPMOVUWB xmm1/m128 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128ByteWithSaturation(Vector256 value) => ConvertToVector128ByteWithSaturation(value);
+
+ ///
+ /// __m128i _mm_cvtepi16_epi8 (__m128i a)
+ /// VPMOVWB xmm1/m64 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByte(Vector128 value) => ConvertToVector128SByte(value);
+ ///
+ /// __m128i _mm_cvtepi16_epi8 (__m128i a)
+ /// VPMOVWB xmm1/m64 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByte(Vector128 value) => ConvertToVector128SByte(value);
+ ///
+ /// __m128i _mm256_cvtepi16_epi8 (__m256i a)
+ /// VPMOVWB xmm1/m128 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByte(Vector256 value) => ConvertToVector128SByte(value);
+ ///
+ /// __m128i _mm256_cvtepi16_epi8 (__m256i a)
+ /// VPMOVWB xmm1/m128 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByte(Vector256 value) => ConvertToVector128SByte(value);
+ ///
+ /// __m128i _mm_cvtsepi16_epi8 (__m128i a)
+ /// VPMOVSWB xmm1/m64 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByteWithSaturation(Vector128 value) => ConvertToVector128SByteWithSaturation(value);
+ ///
+ /// __m128i _mm256_cvtsepi16_epi8 (__m256i a)
+ /// VPMOVSWB xmm1/m128 {k1}{z}, zmm2
+ ///
+ public static Vector128 ConvertToVector128SByteWithSaturation(Vector256 value) => ConvertToVector128SByteWithSaturation(value);
+
///
/// __m128i _mm_permutevar8x16_epi16 (__m128i a, __m128i b)
/// VPERMW xmm1 {k1}{z}, xmm2, xmm3/m128
@@ -202,6 +264,38 @@ internal X64() { }
///
public static Vector512 BroadcastScalarToVector512(Vector128 value) => BroadcastScalarToVector512(value);
+ ///
+ /// __m256i _mm512_cvtepi16_epi8 (__m512i a)
+ /// VPMOVWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256Byte(Vector512 value) => ConvertToVector256Byte(value);
+ ///
+ /// __m256i _mm512_cvtepi16_epi8 (__m512i a)
+ /// VPMOVWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256Byte(Vector512 value) => ConvertToVector256Byte(value);
+ ///
+ /// __m256i _mm512_cvtusepi16_epi8 (__m512i a)
+ /// VPMOVUWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256ByteWithSaturation(Vector512 value) => ConvertToVector256ByteWithSaturation(value);
+
+ ///
+ /// __m256i _mm512_cvtepi16_epi8 (__m512i a)
+ /// VPMOVWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256SByte(Vector512 value) => ConvertToVector256SByte(value);
+ ///
+ /// __m256i _mm512_cvtepi16_epi8 (__m512i a)
+ /// VPMOVWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256SByte(Vector512 value) => ConvertToVector256SByte(value);
+ ///
+ /// __m256i _mm512_cvtsepi16_epi8 (__m512i a)
+ /// VPMOVSWB ymm1/m256 {k1}{z}, zmm2
+ ///
+ public static Vector256 ConvertToVector256SByteWithSaturation(Vector512 value) => ConvertToVector256SByteWithSaturation(value);
+
///
/// __m512i _mm512_cvtepi8_epi16 (__m128i a)
/// VPMOVSXBW zmm1 {k1}{z}, ymm2/m256
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.PlatformNotSupported.cs
index 63bc6ecd58ffe8..926c8737a166fa 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.PlatformNotSupported.cs
@@ -32,11 +32,6 @@ internal VL() { }
/// VBROADCASTI32x2 xmm1 {k1}{z}, xmm2/m64
///
public static Vector128 BroadcastPairScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); }
- ///
- /// __m128 _mm_broadcast_f32x2 (__m128 a)
- /// VBROADCASTF32x2 xmm1 {k1}{z}, xmm2/m64
- ///
- public static Vector128 BroadcastPairScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); }
///
/// __m256i _mm256_broadcast_i32x2 (__m128i a)
@@ -54,6 +49,128 @@ internal VL() { }
///
public static Vector256 BroadcastPairScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128d _mm_cvtepi64_pd (__m128i a)
+ /// VCVTQQ2PD xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Double(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128d _mm_cvtepu64_pd (__m128i a)
+ /// VCVTUQQ2PD xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Double(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtps_epi64 (__m128 a)
+ /// VCVTPS2QQ xmm1 {k1}{z}, xmm2/m64/m32bcst
+ ///
+ public static Vector128 ConvertToVector128Int64(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtpd_epi64 (__m128d a)
+ /// VCVTPD2QQ xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Int64(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvttps_epi64 (__m128 a)
+ /// VCVTTPS2QQ xmm1 {k1}{z}, xmm2/m64/m32bcst
+ ///
+ public static Vector128 ConvertToVector128Int64WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvttpd_epi64 (__m128d a)
+ /// VCVTTPD2QQ xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Int64WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128 _mm_cvtepi64_ps (__m128i a)
+ /// VCVTQQ2PS xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Single(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128 _mm256_cvtepi64_ps (__m256i a)
+ /// VCVTQQ2PS xmm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128 _mm_cvtepu64_ps (__m128i a)
+ /// VCVTUQQ2PS xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Single(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128 _mm256_cvtepu64_ps (__m256i a)
+ /// VCVTUQQ2PS xmm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtps_epu64 (__m128 a)
+ /// VCVTPS2UQQ xmm1 {k1}{z}, xmm2/m64/m32bcst
+ ///
+ public static Vector128 ConvertToVector128UInt64(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtpd_epu64 (__m128d a)
+ /// VCVTPD2UQQ xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128UInt64(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvttps_epu64 (__m128 a)
+ /// VCVTTPS2UQQ xmm1 {k1}{z}, xmm2/m64/m32bcst
+ ///
+ public static Vector128 ConvertToVector128UInt64WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvttpd_epu64 (__m128d a)
+ /// VCVTTPD2UQQ xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128UInt64WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m256d _mm256_cvtepi64_pd (__m256i a)
+ /// VCVTQQ2PD ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Double(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256d _mm256_cvtepu64_pd (__m256i a)
+ /// VCVTUQQ2PD ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Double(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm256_cvtps_epi64 (__m128 a)
+ /// VCVTPS2QQ ymm1 {k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector256 ConvertToVector256Int64(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm256_cvtpd_epi64 (__m256d a)
+ /// VCVTPD2QQ ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Int64(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm256_cvttps_epi64 (__m128 a)
+ /// VCVTTPS2QQ ymm1 {k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector256 ConvertToVector256Int64WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm256_cvttpd_epi64 (__m256d a)
+ /// VCVTTPD2QQ ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Int64WithTruncation(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm256_cvtps_epu64 (__m128 a)
+ /// VCVTPS2UQQ ymm1 {k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector256 ConvertToVector256UInt64(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm256_cvtpd_epu64 (__m256d a)
+ /// VCVTPD2UQQ ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256UInt64(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm256_cvttps_epu64 (__m128 a)
+ /// VCVTTPS2UQQ ymm1 {k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector256 ConvertToVector256UInt64WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m256i _mm256_cvttpd_epu64 (__m256d a)
+ /// VCVTTPD2UQQ ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256UInt64WithTruncation(Vector256 value) { throw new PlatformNotSupportedException(); }
+
///
/// __m128i _mm_mullo_epi64 (__m128i a, __m128i b)
/// VPMULLQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
@@ -153,6 +270,68 @@ internal X64() { }
///
public static unsafe Vector512 BroadcastVector256ToVector512(float* address) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512 _mm512_cvtepi64_ps (__m512i a)
+ /// VCVTQQ2PS ymm1 {k1}{z}, zmm2/m512/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Single(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512 _mm512_cvtepu64_ps (__m512i a)
+ /// VCVTUQQ2PS ymm1 {k1}{z}, zmm2/m512/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Single(Vector512 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512d _mm512_cvtepi64_pd (__m512i a)
+ /// VCVTQQ2PD zmm1 {k1}{z}, zmm2/m512/m64bcst
+ ///
+ public static Vector512 ConvertToVector512Double(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512d _mm512_cvtepu64_pd (__m512i a)
+ /// VCVTUQQ2PD zmm1 {k1}{z}, zmm2/m512/m64bcst
+ ///
+ public static Vector512 ConvertToVector512Double(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvtps_epi64 (__m512 a)
+ /// VCVTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+ ///
+ public static Vector512 ConvertToVector512Int64(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvtpd_epi64 (__m512d a)
+ /// VCVTPD2QQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
+ ///
+ public static Vector512 ConvertToVector512Int64(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvttps_epi64 (__m512 a)
+ /// VCVTTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+ ///
+ public static Vector512 ConvertToVector512Int64WithTruncation(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvttpd_epi64 (__m512 a)
+ /// VCVTTPD2QQ zmm1 {k1}{z}, zmm2/m512/m64bcst{sae}
+ ///
+ public static Vector512 ConvertToVector512Int64WithTruncation(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvtps_epu64 (__m512 a)
+ /// VCVTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+ ///
+ public static Vector512 ConvertToVector512UInt64(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvtpd_epu64 (__m512d a)
+ /// VCVTPD2UQQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
+ ///
+ public static Vector512 ConvertToVector512UInt64(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvttps_epu64 (__m512 a)
+ /// VCVTTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+ ///
+ public static Vector512 ConvertToVector512UInt64WithTruncation(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvttpd_epu64 (__m512d a)
+ /// VCVTTPD2UQQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
+ ///
+ public static Vector512 ConvertToVector512UInt64WithTruncation(Vector512 value) { throw new PlatformNotSupportedException(); }
+
///
/// __m128i _mm512_extracti64x2_epi64 (__m512i a, const int imm8)
/// VEXTRACTI64x2 xmm1/m128 {k1}{z}, zmm2, imm8
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs
index 1f4ede8ccd3783..3fb336d7da0554 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs
@@ -32,11 +32,6 @@ internal VL() { }
/// VBROADCASTI32x2 xmm1 {k1}{z}, xmm2/m64
///
public static Vector128 BroadcastPairScalarToVector128(Vector128 value) => BroadcastPairScalarToVector128(value);
- ///
- /// __m128 _mm_broadcast_f32x2 (__m128 a)
- /// VBROADCASTF32x2 xmm1 {k1}{z}, xmm2/m64
- ///
- public static Vector128 BroadcastPairScalarToVector128(Vector128 value) => BroadcastPairScalarToVector128(value);
///
/// __m256i _mm256_broadcast_i32x2 (__m128i a)
@@ -54,6 +49,128 @@ internal VL() { }
///
public static Vector256 BroadcastPairScalarToVector256(Vector128 value) => BroadcastPairScalarToVector256(value);
+ ///
+ /// __m128d _mm_cvtepi64_pd (__m128i a)
+ /// VCVTQQ2PD xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Double(Vector128 value) => ConvertToVector128Double(value);
+ ///
+ /// __m128d _mm_cvtepu64_pd (__m128i a)
+ /// VCVTUQQ2PD xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Double(Vector128 value) => ConvertToVector128Double(value);
+ ///
+ /// __m128i _mm_cvtps_epi64 (__m128 a)
+ /// VCVTPS2QQ xmm1 {k1}{z}, xmm2/m64/m32bcst
+ ///
+ public static Vector128 ConvertToVector128Int64(Vector128 value) => ConvertToVector128Int64(value);
+ ///
+ /// __m128i _mm_cvtpd_epi64 (__m128d a)
+ /// VCVTPD2QQ xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Int64(Vector128 value) => ConvertToVector128Int64(value);
+ ///
+ /// __m128i _mm_cvttps_epi64 (__m128 a)
+ /// VCVTTPS2QQ xmm1 {k1}{z}, xmm2/m64/m32bcst
+ ///
+ public static Vector128 ConvertToVector128Int64WithTruncation(Vector128 value) => ConvertToVector128Int64WithTruncation(value);
+ ///
+ /// __m128i _mm_cvttpd_epi64 (__m128d a)
+ /// VCVTTPD2QQ xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Int64WithTruncation(Vector128 value) => ConvertToVector128Int64WithTruncation(value);
+ ///
+ /// __m128 _mm_cvtepi64_ps (__m128i a)
+ /// VCVTQQ2PS xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Single(Vector128 value) => ConvertToVector128Single(value);
+ ///
+ /// __m128 _mm256_cvtepi64_ps (__m256i a)
+ /// VCVTQQ2PS xmm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value) => ConvertToVector128Single(value);
+ ///
+ /// __m128 _mm_cvtepu64_ps (__m128i a)
+ /// VCVTUQQ2PS xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Single(Vector128 value) => ConvertToVector128Single(value);
+ ///
+ /// __m128 _mm256_cvtepu64_ps (__m256i a)
+ /// VCVTUQQ2PS xmm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector128 ConvertToVector128Single(Vector256 value) => ConvertToVector128Single(value);
+ ///
+ /// __m128i _mm_cvtps_epu64 (__m128 a)
+ /// VCVTPS2UQQ xmm1 {k1}{z}, xmm2/m64/m32bcst
+ ///
+ public static Vector128 ConvertToVector128UInt64(Vector128 value) => ConvertToVector128UInt64(value);
+ ///
+ /// __m128i _mm_cvtpd_epu64 (__m128d a)
+ /// VCVTPD2UQQ xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128UInt64(Vector128 value) => ConvertToVector128UInt64(value);
+ ///
+ /// __m128i _mm_cvttps_epu64 (__m128 a)
+ /// VCVTTPS2UQQ xmm1 {k1}{z}, xmm2/m64/m32bcst
+ ///
+ public static Vector128 ConvertToVector128UInt64WithTruncation(Vector128 value) => ConvertToVector128UInt64WithTruncation(value);
+ ///
+ /// __m128i _mm_cvttpd_epu64 (__m128d a)
+ /// VCVTTPD2UQQ xmm1 {k1}{z}, xmm2/m128/m64bcst
+ ///
+ public static Vector128 ConvertToVector128UInt64WithTruncation(Vector128 value) => ConvertToVector128UInt64WithTruncation(value);
+
+ ///
+ /// __m256d _mm256_cvtepi64_pd (__m256i a)
+ /// VCVTQQ2PD ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Double(Vector256 value) => ConvertToVector256Double(value);
+ ///
+ /// __m256d _mm256_cvtepu64_pd (__m256i a)
+ /// VCVTUQQ2PD ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Double(Vector256 value) => ConvertToVector256Double(value);
+ ///
+ /// __m256i _mm256_cvtps_epi64 (__m128 a)
+ /// VCVTPS2QQ ymm1 {k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector256 ConvertToVector256Int64(Vector128 value) => ConvertToVector256Int64(value);
+ ///
+ /// __m256i _mm256_cvtpd_epi64 (__m256d a)
+ /// VCVTPD2QQ ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Int64(Vector256 value) => ConvertToVector256Int64(value);
+ ///
+ /// __m256i _mm256_cvttps_epi64 (__m128 a)
+ /// VCVTTPS2QQ ymm1 {k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector256 ConvertToVector256Int64WithTruncation(Vector128 value) => ConvertToVector256Int64WithTruncation(value);
+ ///
+ /// __m256i _mm256_cvttpd_epi64 (__m256d a)
+ /// VCVTTPD2QQ ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Int64WithTruncation(Vector256 value) => ConvertToVector256Int64WithTruncation(value);
+ ///
+ /// __m256i _mm256_cvtps_epu64 (__m128 a)
+ /// VCVTPS2UQQ ymm1 {k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector256 ConvertToVector256UInt64(Vector128 value) => ConvertToVector256UInt64(value);
+ ///
+ /// __m256i _mm256_cvtpd_epu64 (__m256d a)
+ /// VCVTPD2UQQ ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256UInt64(Vector256 value) => ConvertToVector256UInt64(value);
+ ///
+ /// __m256i _mm256_cvttps_epu64 (__m128 a)
+ /// VCVTTPS2UQQ ymm1 {k1}{z}, xmm2/m128/m32bcst
+ ///
+ public static Vector256 ConvertToVector256UInt64WithTruncation(Vector128 value) => ConvertToVector256UInt64WithTruncation(value);
+ ///
+ /// __m256i _mm256_cvttpd_epu64 (__m256d a)
+ /// VCVTTPD2UQQ ymm1 {k1}{z}, ymm2/m256/m64bcst
+ ///
+ public static Vector256 ConvertToVector256UInt64WithTruncation(Vector256 value) => ConvertToVector256UInt64WithTruncation(value);
+
///
/// __m128i _mm_mullo_epi64 (__m128i a, __m128i b)
/// VPMULLQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
@@ -154,6 +271,68 @@ internal X64() { }
///
public static unsafe Vector512 BroadcastVector256ToVector512(float* address) => BroadcastVector256ToVector512(address);
+ ///
+ /// __m512 _mm512_cvtepi64_ps (__m512i a)
+ /// VCVTQQ2PS ymm1 {k1}{z}, zmm2/m512/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Single(Vector512 value) => ConvertToVector256Single(value);
+ ///
+ /// __m512 _mm512_cvtepu64_ps (__m512i a)
+ /// VCVTUQQ2PS ymm1 {k1}{z}, zmm2/m512/m64bcst
+ ///
+ public static Vector256 ConvertToVector256Single(Vector512 value) => ConvertToVector256Single(value);
+
+ ///
+ /// __m512d _mm512_cvtepi64_pd (__m512i a)
+ /// VCVTQQ2PD zmm1 {k1}{z}, zmm2/m512/m64bcst
+ ///
+ public static Vector512 ConvertToVector512Double(Vector512 value) => ConvertToVector512Double(value);
+ ///
+ /// __m512d _mm512_cvtepu64_pd (__m512i a)
+ /// VCVTUQQ2PD zmm1 {k1}{z}, zmm2/m512/m64bcst
+ ///
+ public static Vector512 ConvertToVector512Double(Vector512 value) => ConvertToVector512Double(value);
+ ///
+ /// __m512i _mm512_cvtps_epi64 (__m512 a)
+ /// VCVTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+ ///
+ public static Vector512 ConvertToVector512Int64(Vector256 value) => ConvertToVector512Int64(value);
+ ///
+ /// __m512i _mm512_cvtpd_epi64 (__m512d a)
+ /// VCVTPD2QQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
+ ///
+ public static Vector512 ConvertToVector512Int64(Vector512 value) => ConvertToVector512Int64(value);
+ ///
+ /// __m512i _mm512_cvttps_epi64 (__m512 a)
+ /// VCVTTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+ ///
+ public static Vector512 ConvertToVector512Int64WithTruncation(Vector256 value) => ConvertToVector512Int64WithTruncation(value);
+ ///
+ /// __m512i _mm512_cvttpd_epi64 (__m512 a)
+ /// VCVTTPD2QQ zmm1 {k1}{z}, zmm2/m512/m64bcst{sae}
+ ///
+ public static Vector512 ConvertToVector512Int64WithTruncation(Vector512 value) => ConvertToVector512Int64WithTruncation(value);
+ ///
+ /// __m512i _mm512_cvtps_epu64 (__m512 a)
+ /// VCVTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+ ///
+ public static Vector512 ConvertToVector512UInt64(Vector256 value) => ConvertToVector512UInt64(value);
+ ///
+ /// __m512i _mm512_cvtpd_epu64 (__m512d a)
+ /// VCVTPD2UQQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
+ ///
+ public static Vector512 ConvertToVector512UInt64(Vector512 value) => ConvertToVector512UInt64(value);
+ ///
+ /// __m512i _mm512_cvttps_epu64 (__m512 a)
+ /// VCVTTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+ ///
+ public static Vector512 ConvertToVector512UInt64WithTruncation(Vector256 value) => ConvertToVector512UInt64WithTruncation(value);
+ ///
+ /// __m512i _mm512_cvttpd_epu64 (__m512d a)
+ /// VCVTTPD2UQQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
+ ///
+ public static Vector512 ConvertToVector512UInt64WithTruncation(Vector512 value) => ConvertToVector512UInt64WithTruncation(value);
+
///
/// __m128i _mm512_extracti64x2_epi64 (__m512i a, const int imm8)
/// VEXTRACTI64x2 xmm1/m128 {k1}{z}, zmm2, imm8
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs
index 2d358ef118b635..0184d52f7b695f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs
@@ -33,6 +33,375 @@ internal VL() { }
///
public static Vector256 Abs(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtepi32_epi8 (__m128i a)
+ /// VPMOVDB xmm1/m32 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtepi64_epi8 (__m128i a)
+ /// VPMOVQB xmm1/m16 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtepi32_epi8 (__m128i a)
+ /// VPMOVDB xmm1/m32 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtepi64_epi8 (__m128i a)
+ /// VPMOVQB xmm1/m16 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector128 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtepi32_epi8 (__m256i a)
+ /// VPMOVDB xmm1/m64 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtepi64_epi8 (__m256i a)
+ /// VPMOVQB xmm1/m32 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtepi32_epi8 (__m256i a)
+ /// VPMOVDB xmm1/m64 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm256_cvtepi64_epi8 (__m256i a)
+ /// VPMOVQB xmm1/m32 {k1}{z}, ymm2
+ ///
+ public static Vector128 ConvertToVector128Byte(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m128i _mm_cvtusepi32_epi8 (__m128i a)
+ /// VPMOVUSDB xmm1/m32 {k1}{z}, xmm2
+ ///
+ public static Vector128 ConvertToVector128ByteWithSaturation(Vector128