Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avx512 convert opt #3

Open
wants to merge 5 commits into
base: avx512-RR
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ bool emitter::IsSSEOrAVXInstruction(instruction ins)
bool emitter::IsSSEOrAVXorAVX512Instruction(instruction ins)
{
// TODO-XArch-AVX512: Fix check once AVX512 instructions are added.
return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX512_INSTRUCTION);
}

bool emitter::IsAVXOnlyInstruction(instruction ins)
Expand All @@ -47,7 +47,7 @@ bool emitter::IsAVXOnlyInstruction(instruction ins)

bool emitter::IsAVX512OnlyInstruction(instruction ins)
{
return false; // TODO-XArch-AVX512: Fix check once AVX512 instructions are added.
return (ins >= INS_FIRST_AVX512_INSTRUCTION) && (ins <= INS_LAST_AVX512_INSTRUCTION);
}

bool emitter::IsFMAInstruction(instruction ins)
Expand Down Expand Up @@ -13073,10 +13073,6 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
regNumber reg1 = id->idReg1();
regNumber reg2 = id->idReg2();
emitAttr size = id->idOpSize();
if ((ins == INS_movq) || (ins == INS_movd))
{
emitDispIns(id, false, false, false);
}

if (IsSSEOrAVXorAVX512Instruction(ins))
{
Expand Down Expand Up @@ -17185,6 +17181,20 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;
}
#endif

case INS_vcvtqq2pd:
case INS_vcvtuqq2pd:
case INS_vcvtpd2qq:
case INS_vcvtpd2uqq:
case INS_vcvtps2udq:
case INS_vcvtudq2ps:
{
// TODO-XARCH-AVX512: fill these proper
result.insLatency += PERFSCORE_LATENCY_1C;
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
break;
}

default:
// unhandled instruction insFmt combination
perfScoreUnhandledInstruction(id, &result);
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,16 @@ bool IsWEvexOpcodeExtension(instruction ins)
case INS_vfnmsub231sd:
case INS_unpcklpd:
case INS_vpermilpdvar:

// New AVX512 ins
case INS_vcvtqq2pd:
case INS_vcvtuqq2pd:
case INS_vcvtpd2qq:
case INS_vcvtpd2uqq:
{
return true; // W1
}

case INS_movd:
case INS_punpckldq:
case INS_movntdq:
Expand Down Expand Up @@ -378,9 +385,14 @@ bool IsWEvexOpcodeExtension(instruction ins)
case INS_vpdpbusds:
case INS_vpdpwssds:
case INS_vpermilpsvar:

// New AVX512 ins
case INS_vcvtps2udq:
case INS_vcvtudq2ps:
{
return false; // W0
}

default:
{
return false; // WIG
Expand Down
34 changes: 32 additions & 2 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -813,12 +813,42 @@ HARDWARE_INTRINSIC(POPCNT, PopCount,
// POPCNT Intrinsics
HARDWARE_INTRINSIC(POPCNT_X64, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// X86Serialize Intrinsics
HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// X86Serialize Intrinsics
HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
// AVX512F Intrinsics

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVX512F_VL Intrinsics

HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)

HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVX512DQ VL Intrinsics
HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)

HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)

HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)



// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
Expand Down
62 changes: 57 additions & 5 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -848,15 +848,61 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_ConvertToDouble:
case NI_Vector256_ConvertToDouble:
{
assert(sig->numArgs == 1);
assert(simdBaseType == TYP_LONG || simdBaseType == TYP_ULONG);

intrinsic = (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Double
: NI_AVX512DQ_VL_ConvertToVector128Double;

op1 = impSIMDPopStack(retType);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);

break;
}

case NI_Vector128_ConvertToInt64:
case NI_Vector256_ConvertToInt64:
case NI_Vector128_ConvertToUInt32:
case NI_Vector256_ConvertToUInt32:
{
assert(sig->numArgs == 1);
assert(simdBaseType == TYP_DOUBLE);

intrinsic = (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256Int64
: NI_AVX512DQ_VL_ConvertToVector128Int64;

op1 = impSIMDPopStack(retType);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);

break;
}

case NI_Vector128_ConvertToUInt64:
case NI_Vector256_ConvertToUInt64:
{
assert(sig->numArgs == 1);
// TODO-XARCH-CQ: These intrinsics should be accelerated
assert(simdBaseType == TYP_DOUBLE);

intrinsic = (simdSize == 32) ? NI_AVX512DQ_VL_ConvertToVector256UInt64
: NI_AVX512DQ_VL_ConvertToVector128UInt64;

op1 = impSIMDPopStack(retType);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);

break;
}

case NI_Vector128_ConvertToUInt32:
case NI_Vector256_ConvertToUInt32:
{
assert(sig->numArgs == 1);
assert(simdBaseType == TYP_FLOAT);

intrinsic = (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256UInt32
: NI_AVX512F_VL_ConvertToVector128UInt32;

op1 = impSIMDPopStack(retType);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);

break;
}

Expand Down Expand Up @@ -886,10 +932,16 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
op1 = impSIMDPopStack(retType);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
}
else if (simdBaseType == TYP_UINT)
{
intrinsic = (simdSize == 32) ? NI_AVX512F_VL_ConvertToVector256Single : NI_AVX512F_VL_ConvertToVector128Single;

op1 = impSIMDPopStack(retType);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
}
else
{
// TODO-XARCH-CQ: These intrinsics should be accelerated
assert(simdBaseType == TYP_UINT);
unreached();
}
break;
}
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,18 @@ INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE,

INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)

INST3(FIRST_AVX512_INSTRUCTION, "FIRST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)

INST3(vcvtqq2pd, "cvtqq2pd", IUM_WR, BAD_CODE, BAD_CODE, PACK3(0xF3, 0x0F, 0xE6), INS_FLAGS_None) // cvt packed quad word to double
INST3(vcvtuqq2pd, "cvtuqq2pd", IUM_WR, BAD_CODE, BAD_CODE, PACK3(0xF3, 0x0F, 0x7A), INS_FLAGS_None) // cvt packed unsigned quad word to double
INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, PACK3(0xF2, 0x0F, 0x7A), INS_FLAGS_None) // cvt packed unsigned quad word to double
INST3(vcvtpd2qq, "cvtpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PACK3(0x66, 0x0F, 0x7B), INS_FLAGS_None) // cvt packed quad word to double
INST3(vcvtpd2uqq, "cvtpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PACK3(0x66, 0x0F, 0x79), INS_FLAGS_None) // cvt packed quad word to double
INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PACK2(0x0F, 0x79), INS_FLAGS_None) // cvt packed quad word to double


INST3(LAST_AVX512_INSTRUCTION, "LAST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)

// Scalar instructions in SSE4.2
INST3(crc32, "crc32", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0), INS_FLAGS_None)

Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6370,7 +6370,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
(intrinsicId == NI_AVX2_BroadcastScalarToVector256) ||
(intrinsicId == NI_AVX2_ConvertToVector256Int16) ||
(intrinsicId == NI_AVX2_ConvertToVector256Int32) ||
(intrinsicId == NI_AVX2_ConvertToVector256Int64));
(intrinsicId == NI_AVX2_ConvertToVector256Int64) ||
(intrinsicId == NI_AVX512DQ_VL_ConvertToVector128Double) ||
(intrinsicId == NI_AVX512DQ_VL_ConvertToVector256Double));

ContainCheckHWIntrinsicAddr(node, op1);
}
Expand Down
Loading