From 237ecdc0b62cd0cc8e5990a4f6f67a4548c35b47 Mon Sep 17 00:00:00 2001 From: Pent Ploompuu Date: Sat, 14 Aug 2021 14:39:52 +0300 Subject: [PATCH 1/2] Fix ARM64 unsigned div by const perf regression --- src/coreclr/jit/codegenarm64.cpp | 11 ++++++++++- src/coreclr/jit/lower.cpp | 8 ++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 2428c33fee6f78..d75a0b37fd4c8e 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1853,8 +1853,17 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) // The arithmetic node must be sitting in a register (since it's not contained) assert(targetReg != REG_NA); + emitAttr attr = emitActualTypeSize(treeNode); - regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2); + // UMULL/SMULL is twice as fast for 32*32->64bit MUL + if (oper == GT_MUL && EA_SIZE(attr) == EA_8BYTE && !varTypeIsFloating(treeNode->TypeGet()) && + EA_SIZE(emitActualTypeSize(op1)) == EA_4BYTE && EA_SIZE(emitActualTypeSize(op2)) == EA_4BYTE) + { + ins = (treeNode->gtFlags & GTF_UNSIGNED) ? INS_umull : INS_smull; + attr = EA_4BYTE; + } + + regNumber r = emit->emitInsTernary(ins, attr, treeNode, op1, op2); assert(r == targetReg); genProduceReg(treeNode); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a328206151a001..9a90d6a69f1401 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5254,7 +5254,11 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) BlockRange().InsertBefore(divMod, preShiftBy, adjustedDividend); firstNode = preShiftBy; } - else if (type != TYP_I_IMPL) + else if (type != TYP_I_IMPL +#ifdef TARGET_ARM64 + && !simpleMul +#endif + ) { adjustedDividend = comp->gtNewCastNode(TYP_I_IMPL, adjustedDividend, true, TYP_U_IMPL); BlockRange().InsertBefore(divMod, adjustedDividend); @@ -5268,7 +5272,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) adjustedDividend->SetRegNum(REG_RAX); #endif - divisor->gtType = TYP_I_IMPL; + divisor->gtType = simpleMul ? TYP_INT : TYP_I_IMPL; divisor->AsIntCon()->SetIconValue(magic); if (isDiv && !postShift && type == TYP_I_IMPL) From 3ec691fc0758c433d52b83aa13e3bb24ba822039 Mon Sep 17 00:00:00 2001 From: Egor Chesakov Date: Tue, 17 Aug 2021 09:48:58 -0700 Subject: [PATCH 2/2] Address feedback by @SingleAccretion --- src/coreclr/jit/codegenarm64.cpp | 5 ++--- src/coreclr/jit/lower.cpp | 12 ++++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d75a0b37fd4c8e..33e7ec55a9ce4e 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1856,10 +1856,9 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) emitAttr attr = emitActualTypeSize(treeNode); // UMULL/SMULL is twice as fast for 32*32->64bit MUL - if (oper == GT_MUL && EA_SIZE(attr) == EA_8BYTE && !varTypeIsFloating(treeNode->TypeGet()) && - EA_SIZE(emitActualTypeSize(op1)) == EA_4BYTE && EA_SIZE(emitActualTypeSize(op2)) == EA_4BYTE) + if ((oper == GT_MUL) && (targetType == TYP_LONG) && genActualTypeIsInt(op1) && genActualTypeIsInt(op2)) { - ins = (treeNode->gtFlags & GTF_UNSIGNED) ? INS_umull : INS_smull; + ins = treeNode->IsUnsigned() ? INS_umull : INS_smull; attr = EA_4BYTE; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 9a90d6a69f1401..08cbc8b0ad0d37 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5256,7 +5256,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } else if (type != TYP_I_IMPL #ifdef TARGET_ARM64 - && !simpleMul + && !simpleMul // On ARM64 we will use a 32x32->64 bit multiply as that's faster. #endif ) { @@ -5272,7 +5272,15 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) adjustedDividend->SetRegNum(REG_RAX); #endif - divisor->gtType = simpleMul ? TYP_INT : TYP_I_IMPL; + divisor->gtType = TYP_I_IMPL; + +#ifdef TARGET_ARM64 + if (simpleMul) + { + divisor->gtType = TYP_INT; + } +#endif + divisor->AsIntCon()->SetIconValue(magic); if (isDiv && !postShift && type == TYP_I_IMPL)