diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index edfca27eb7495a..ec98ef894d30b7 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -34,9 +34,19 @@ bool emitter::IsSSEOrAVXInstruction(instruction ins) return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION); } +//------------------------------------------------------------------------ +// IsKInstruction: Does this instruction require K register. +// +// Arguments: +// ins - The instruction to check. +// +// Returns: +// `true` if this instruction require K register. +// bool emitter::IsKInstruction(instruction ins) { - return (ins >= INS_FIRST_K_INSTRUCTION) && (ins <= INS_LAST_K_INSTRUCTION); + insFlags flags = CodeGenInterface::instInfo[ins]; + return (flags & KInstruction) != 0; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a867edca504656..f7631f301c5f8a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20386,9 +20386,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ)); + assert(IsBaselineVector512IsaSupportedDebugOnly()); intrinsic = NI_AVX512F_CompareEqualSpecial; } else if (simdBaseType == TYP_FLOAT) @@ -21021,9 +21019,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op, { if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ)); + assert(IsBaselineVector512IsaSupportedDebugOnly()); intrinsic = NI_Vector512_op_Equality; } else if (simdSize == 32) @@ -21179,9 +21175,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op, } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ)); + assert(IsBaselineVector512IsaSupportedDebugOnly()); intrinsic = NI_Vector512_op_Inequality; } else @@ -21209,9 +21203,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op, { if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ)); + assert(IsBaselineVector512IsaSupportedDebugOnly()); intrinsic = NI_Vector512_op_Inequality; } else if (simdSize == 32) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 370bee078fc5c1..afdf57a14d3d0d 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1305,17 +1305,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector512_Equals: { assert(sig->numArgs == 2); + assert(IsBaselineVector512IsaSupportedDebugOnly()); - if (IsBaselineVector512IsaSupported()) - { - var_types simdType = getSIMDTypeForSize(simdSize); - - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType); - - retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, + var_types simdType = getSIMDTypeForSize(simdSize); + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType); + retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); - } break; } @@ -1323,18 +1319,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector512_op_Equality: { assert(sig->numArgs == 2); + assert(IsBaselineVector512IsaSupportedDebugOnly()); - if (IsBaselineVector512IsaSupported()) - { - var_types simdType = getSIMDTypeForSize(simdSize); - - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType); + var_types simdType = getSIMDTypeForSize(simdSize); - retNode = gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, - /* isSimdAsHWIntrinsic */ false); - } + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType); + retNode = gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ false); break; } @@ -1362,17 +1355,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 2); assert(simdSize == 64); + assert(IsBaselineVector512IsaSupportedDebugOnly()); - if (IsBaselineVector512IsaSupported()) - { - var_types simdType = getSIMDTypeForSize(simdSize); + var_types simdType = getSIMDTypeForSize(simdSize); - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType); + op2 = impSIMDPopStack(simdType); + op1 = impSIMDPopStack(simdType); - retNode = gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, - /* isSimdAsHWIntrinsic */ false); - } + retNode = gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ false); break; } diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 1ef608460d6831..7ec6fe74f94396 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -171,6 +171,8 @@ enum insFlags : uint64_t Encoding_VEX = 1ULL << 37, Encoding_EVEX = 1ULL << 38, + KInstruction = 1ULL << 39, + // Listed above so it is "inline" with the other Resets_* flags // Resets_ZF = 1ULL << 39, diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 0f7b90f8f15f77..8cae07f102c73b 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -599,75 +599,64 @@ INST3(shrx, "shrx", IUM_WR, BAD_CODE, BAD_CODE, INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) -INST3(FIRST_K_INSTRUCTION, "FIRST_K_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) - -INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX) -INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX) -INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX) -INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W1 | Encoding_VEX) - -INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX) -INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX) -INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX) -INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX) - -INST3(kortestb, "kortestb", IUM_WR, BAD_CODE, BAD_CODE, PACK3(0x66, 0x0F, 0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Writes_ZF | Writes_CF ) -INST3(kortestw, "kortestw", IUM_WR, BAD_CODE, BAD_CODE, PACK2(0x0F, 0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Writes_ZF | Writes_CF ) -INST3(kortestd, "kortestd", IUM_WR, BAD_CODE, BAD_CODE, PACK3(0x66, 0x0F, 0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Writes_ZF | Writes_CF ) -INST3(kortestq, "kortestq", IUM_WR, BAD_CODE, BAD_CODE, PACK2(0x0F, 0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Writes_ZF | Writes_CF ) - -INST3(LAST_K_INSTRUCTION, "LAST_K_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None ) - INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVX512_INSTRUCTION, "FIRST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) // AVX512F -INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values -INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vmovdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_FLAGS_None) -INST3(vmovdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_FLAGS_None) -INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs -INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs -INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs -INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs +INST3(kortestw, "kortestw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) +INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) +INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) +INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values +INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values +INST3(vmovdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_FLAGS_None) +INST3(vmovdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_FLAGS_None) +INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs +INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs +INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs +INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs // AVX512BW -INST3(vmovdqu8, "movdqu8", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_8Bit | REX_W0_EVEX | Encoding_EVEX) -INST3(vmovdqu16, "movdqu16", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_16Bit | REX_W1_EVEX | Encoding_EVEX) -INST3(vpmovb2m, "pmovb2m", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x29), INS_TT_NONE, Input_8Bit | REX_W0_EVEX | Encoding_EVEX) -INST3(vpmovw2m, "pmovw2m", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x29), INS_TT_NONE, Input_16Bit | REX_W1_EVEX | Encoding_EVEX) +INST3(kortestd, "kortestd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) +INST3(kortestq, "kortestq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) +INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) +INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) +INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) +INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) +INST3(vmovdqu8, "movdqu8", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_8Bit | REX_W0_EVEX | Encoding_EVEX) +INST3(vmovdqu16, "movdqu16", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_16Bit | REX_W1_EVEX | Encoding_EVEX) +INST3(vpcmpb, "pcmpb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), INS_TT_FULL_MEM, Input_8Bit | REX_W0_EVEX | Encoding_EVEX) +INST3(vpcmpw, "pcmpw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), INS_TT_FULL, Input_16Bit | REX_W1_EVEX | Encoding_EVEX) +INST3(vpcmpub, "pcmpub", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), INS_TT_FULL_MEM, Input_8Bit | REX_W0_EVEX | Encoding_EVEX) +INST3(vpcmpuw, "pcmpuw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), INS_TT_FULL, Input_16Bit | REX_W1_EVEX | Encoding_EVEX) +INST3(vpmovb2m, "pmovb2m", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x29), INS_TT_NONE, Input_8Bit | REX_W0_EVEX | Encoding_EVEX) +INST3(vpmovm2b, "pmovm2b", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x28), INS_TT_NONE, Input_8Bit | REX_W0_EVEX | Encoding_EVEX) +INST3(vpmovm2w, "pmovm2w", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x28), INS_TT_NONE, Input_16Bit | REX_W1_EVEX | Encoding_EVEX) +INST3(vpmovw2m, "pmovw2m", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x29), INS_TT_NONE, Input_16Bit | REX_W1_EVEX | Encoding_EVEX) // AVX512DQ -INST3(vinsertf32x8, "insertf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values -INST3(vinserti32x8, "inserti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vpmovd2m, "pmovd2m", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x39), INS_TT_NONE, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) -INST3(vpmovq2m, "pmovq2m", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x39), INS_TT_NONE, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) - -INST3(vpmovm2b, "vpmovm2b", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x28), INS_TT_NONE, Input_8Bit | REX_W0_EVEX | Encoding_EVEX ) -INST3(vpmovm2w, "vpmovm2w", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x28), INS_TT_NONE, Input_16Bit | REX_W1_EVEX | Encoding_EVEX ) -INST3(vpmovm2d, "vpmovm2d", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x38), INS_TT_NONE, Input_32Bit | REX_W0_EVEX | Encoding_EVEX ) -INST3(vpmovm2q, "vpmovm2q", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x38), INS_TT_NONE, Input_64Bit | REX_W1_EVEX | Encoding_EVEX ) - - -INST3(vpcmpb, "vpcmpb", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0x66, 0x0F, 0x3A, 0x3F), INS_TT_FULL, Input_8Bit | REX_W0_EVEX | Encoding_EVEX ) -INST3(vpcmpw, "vpcmpw", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0x66, 0x0F, 0x3A, 0x3F), INS_TT_FULL, Input_16Bit | REX_W1_EVEX | Encoding_EVEX ) -INST3(vpcmpd, "vpcmpd", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0x66, 0x0F, 0x3A, 0x1F), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX ) -INST3(vpcmpq, "vpcmpq", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0x66, 0x0F, 0x3A, 0x1F), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX ) - -INST3(vpcmpub, "vpcmpub", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0x66, 0x0F, 0x3A, 0x3E), INS_TT_FULL, Input_8Bit | REX_W0_EVEX | Encoding_EVEX ) -INST3(vpcmpuw, "vpcmpuw", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0x66, 0x0F, 0x3A, 0x3E), INS_TT_FULL, Input_16Bit | REX_W1_EVEX | Encoding_EVEX ) -INST3(vpcmpud, "vpcmpud", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0x66, 0x0F, 0x3A, 0x1E), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX ) -INST3(vpcmpuq, "vpcmpuq", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0x66, 0x0F, 0x3A, 0x1F), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX ) +INST3(kortestb, "kortestb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) +INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) +INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) +INST3(vinsertf32x8, "insertf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values +INST3(vinserti32x8, "inserti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values +INST3(vpmovd2m, "pmovd2m", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x39), INS_TT_NONE, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) +INST3(vpmovm2d, "pmovm2d", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x38), INS_TT_NONE, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) +INST3(vpmovm2q, "pmovm2q", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x38), INS_TT_NONE, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) +INST3(vpmovq2m, "pmovq2m", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF3, 0x0F, 0x38, 0x39), INS_TT_NONE, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) +INST3(vpcmpd, "pcmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX ) +INST3(vpcmpq, "pcmpq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX ) +INST3(vpcmpud, "pcmpud", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX ) +INST3(vpcmpuq, "pcmpuq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX ) INST3(LAST_AVX512_INSTRUCTION, "LAST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) // Scalar instructions in SSE4.2 -INST3(crc32, "crc32", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0), INS_TT_NONE, INS_FLAGS_None) +INST3(crc32, "crc32", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0), INS_TT_NONE, INS_FLAGS_None) // BMI1 -INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF) // Count the Number of Trailing Zero Bits +INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF) // Count the Number of Trailing Zero Bits // LZCNT INST3(lzcnt, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF)