From 9d0142f768aa54f7a00161beb289c74167b70ed1 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Wed, 15 May 2024 12:41:16 +0200 Subject: [PATCH] Fix ulong->double conversion on x64 (without avx512f) (#102179) --- src/coreclr/jit/codegen.h | 16 ---- src/coreclr/jit/codegencommon.cpp | 9 -- src/coreclr/jit/codegenxarch.cpp | 96 +++++++++---------- src/coreclr/jit/lsraxarch.cpp | 9 ++ src/coreclr/jit/simdcodegenxarch.cpp | 19 ++-- src/coreclr/jit/utils.cpp | 30 +----- .../JitBlue/Runtime_102149/Runtime_102149.cs | 34 +++++++ .../Runtime_102149/Runtime_102149.csproj | 8 ++ 8 files changed, 106 insertions(+), 115 deletions(-) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.csproj diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 077f6cad0e57cd..5441e899bac84b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -47,22 +47,6 @@ class CodeGen final : public CodeGenInterface private: #if defined(TARGET_XARCH) - // Bit masks used in negating a float or double number. - // This is to avoid creating more than one data constant for these bitmasks when a - // method has more than one GT_NEG operation on floating point values. - CORINFO_FIELD_HANDLE negBitmaskFlt; - CORINFO_FIELD_HANDLE negBitmaskDbl; - - // Bit masks used in computing Math.Abs() of a float or double number. - CORINFO_FIELD_HANDLE absBitmaskFlt; - CORINFO_FIELD_HANDLE absBitmaskDbl; - - // Bit mask used in zeroing the 3rd element of a SIMD12 - CORINFO_FIELD_HANDLE zroSimd12Elm3; - - // Bit mask used in U8 -> double conversion to adjust the result. - CORINFO_FIELD_HANDLE u8ToDblBitmask; - // Generates SSE2 code for the given tree as "Operand BitWiseOp BitMask" void genSSE2BitwiseOp(GenTree* treeNode); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 31e8f30a48aa6c..a4c4bb9b45e902 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -196,15 +196,6 @@ void CodeGenInterface::CopyRegisterInfo() CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) { -#if defined(TARGET_XARCH) - negBitmaskFlt = nullptr; - negBitmaskDbl = nullptr; - absBitmaskFlt = nullptr; - absBitmaskDbl = nullptr; - zroSimd12Elm3 = nullptr; - u8ToDblBitmask = nullptr; -#endif // defined(TARGET_XARCH) - #if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(TARGET_X86) m_stkArgVarNum = BAD_VAR_NUM; #endif diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 3c652d7de7fea6..f3d9415d592aad 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7318,47 +7318,53 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) // integral to floating-point conversions all have RMW semantics if VEX support // is not available - bool isRMW = !compiler->canUseVexEncoding(); - inst_RV_RV_TT(ins, emitTypeSize(srcType), targetReg, targetReg, op1, isRMW, INS_OPTS_NONE); + const bool isRMW = !compiler->canUseVexEncoding(); // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction // will interpret ULONG value as LONG. Hence we need to adjust the // result if sign-bit of srcType is set. if (srcType == TYP_ULONG) { - // The instruction sequence below is less accurate than what clang - // and gcc generate. However, we keep the current sequence for backward compatibility. - // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule - // should be also updated for consistent conversion result. assert(dstType == TYP_DOUBLE); assert(op1->isUsedFromReg()); - // Set the flags without modifying op1. - // test op1Reg, op1Reg - inst_RV_RV(INS_test, op1->GetRegNum(), op1->GetRegNum(), srcType); + // The following LONG->DOUBLE cast machinery is based on clang's implementation + // with "-ffp-model=strict" flag: + // + // mov tmp1, arg (8 byte) + // shr tmp1 + // mov tmp2, arg (4 byte) + // and tmp2, 1 + // or tmp2, tmp1 + // test arg, arg + // cmovns tmp2, arg + // cvtsi2sd xmm0, tmp2 + // jns .LABEL + // addsd xmm0, xmm0 + //.LABEL + // + regNumber argReg = treeNode->gtGetOp1()->GetRegNum(); + regNumber tmpReg1 = internalRegisters.Extract(treeNode); + regNumber tmpReg2 = internalRegisters.Extract(treeNode); + + inst_Mov(TYP_LONG, tmpReg1, argReg, /* canSkip */ false, EA_8BYTE); + inst_RV_SH(INS_shr, EA_8BYTE, tmpReg1, 1); + inst_Mov(TYP_INT, tmpReg2, argReg, /* canSkip */ false, EA_4BYTE); + GetEmitter()->emitIns_R_I(INS_and, EA_4BYTE, tmpReg2, 1); + GetEmitter()->emitIns_R_R(INS_or, EA_8BYTE, tmpReg2, tmpReg1); + GetEmitter()->emitIns_R_R(INS_test, EA_8BYTE, argReg, argReg); + GetEmitter()->emitIns_R_R(INS_cmovns, EA_8BYTE, tmpReg2, argReg); + GetEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, tmpReg2); - // No need to adjust result if op1 >= 0 i.e. positive - // Jge label BasicBlock* label = genCreateTempLabel(); - inst_JMP(EJ_jge, label); - - // Adjust the result - // result = result + 0x43f00000 00000000 - // addsd resultReg, 0x43f00000 00000000 - CORINFO_FIELD_HANDLE* cns = &u8ToDblBitmask; - if (*cns == nullptr) - { - double d; - static_assert_no_msg(sizeof(double) == sizeof(__int64)); - *((__int64*)&d) = 0x43f0000000000000LL; - - *cns = GetEmitter()->emitFltOrDblConst(d, EA_8BYTE); - } - GetEmitter()->emitIns_SIMD_R_R_C(INS_addsd, EA_8BYTE, targetReg, targetReg, *cns, 0, INS_OPTS_NONE); - + inst_JMP(EJ_jns, label); + GetEmitter()->emitIns_R_R(INS_addsd, EA_8BYTE, targetReg, targetReg); genDefineTempLabel(label); } - + else + { + inst_RV_RV_TT(ins, emitTypeSize(srcType), targetReg, targetReg, op1, isRMW, INS_OPTS_NONE); + } genProduceReg(treeNode); } @@ -7685,23 +7691,21 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode) { regNumber targetReg = treeNode->GetRegNum(); regNumber operandReg = genConsumeReg(treeNode->gtGetOp1()); - emitAttr size = emitTypeSize(treeNode); assert(varTypeIsFloating(treeNode->TypeGet())); assert(treeNode->gtGetOp1()->isUsedFromReg()); - CORINFO_FIELD_HANDLE* maskFld = nullptr; - UINT64 mask = 0; - instruction ins = INS_invalid; + CORINFO_FIELD_HANDLE maskFld = NO_FIELD_HANDLE; + UINT64 mask = 0; + instruction ins = INS_invalid; if (treeNode->OperIs(GT_NEG)) { // Neg(x) = flip the sign bit. // Neg(f) = f ^ 0x80000000 x4 (packed) // Neg(d) = d ^ 0x8000000000000000 x2 (packed) - ins = INS_xorps; - mask = treeNode->TypeIs(TYP_FLOAT) ? 0x8000000080000000UL : 0x8000000000000000UL; - maskFld = treeNode->TypeIs(TYP_FLOAT) ? &negBitmaskFlt : &negBitmaskDbl; + ins = INS_xorps; + mask = treeNode->TypeIs(TYP_FLOAT) ? 0x8000000080000000UL : 0x8000000000000000UL; } else if (treeNode->OperIs(GT_INTRINSIC)) { @@ -7709,30 +7713,24 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode) // Abs(x) = set sign-bit to zero // Abs(f) = f & 0x7fffffff x4 (packed) // Abs(d) = d & 0x7fffffffffffffff x2 (packed) - ins = INS_andps; - mask = treeNode->TypeIs(TYP_FLOAT) ? 0x7FFFFFFF7FFFFFFFUL : 0x7FFFFFFFFFFFFFFFUL; - maskFld = treeNode->TypeIs(TYP_FLOAT) ? &absBitmaskFlt : &absBitmaskDbl; + ins = INS_andps; + mask = treeNode->TypeIs(TYP_FLOAT) ? 0x7FFFFFFF7FFFFFFFUL : 0x7FFFFFFFFFFFFFFFUL; } else { assert(!"genSSE2BitwiseOp: unsupported oper"); } - if (*maskFld == nullptr) - { - simd16_t constValue; - - constValue.u64[0] = mask; - constValue.u64[1] = mask; - + simd16_t constValue; + constValue.u64[0] = mask; + constValue.u64[1] = mask; #if defined(FEATURE_SIMD) - *maskFld = GetEmitter()->emitSimd16Const(constValue); + maskFld = GetEmitter()->emitSimd16Const(constValue); #else - *maskFld = GetEmitter()->emitBlkConst(&constValue, 16, 16, treeNode->TypeGet()); + maskFld = GetEmitter()->emitBlkConst(&constValue, 16, 16, treeNode->TypeGet()); #endif - } - GetEmitter()->emitIns_SIMD_R_R_C(ins, EA_16BYTE, targetReg, operandReg, *maskFld, 0, INS_OPTS_NONE); + GetEmitter()->emitIns_SIMD_R_R_C(ins, EA_16BYTE, targetReg, operandReg, maskFld, 0, INS_OPTS_NONE); } //----------------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 8d376cff4e725c..063dc02a04573d 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2827,6 +2827,15 @@ int LinearScan::BuildCast(GenTreeCast* cast) const var_types srcType = genActualType(src->TypeGet()); const var_types castType = cast->gtCastType; + if ((srcType == TYP_LONG) && (castType == TYP_DOUBLE) && + !compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + { + // We need two extra temp regs for LONG->DOUBLE cast + // if we don't have AVX512F available. + buildInternalIntRegisterDefForNode(cast); + buildInternalIntRegisterDefForNode(cast); + } + regMaskTP candidates = RBM_NONE; #ifdef TARGET_X86 if (varTypeIsByte(castType)) diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index b344460a100abb..57fcb5d4688004 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -536,19 +536,12 @@ void CodeGen::genSimd12UpperClear(regNumber tgtReg) else { // Preserve element 0, 1, and 2; Zero element 3 - - if (zroSimd12Elm3 == NO_FIELD_HANDLE) - { - simd16_t constValue; - - constValue.u32[0] = 0xFFFFFFFF; - constValue.u32[1] = 0xFFFFFFFF; - constValue.u32[2] = 0xFFFFFFFF; - constValue.u32[3] = 0x00000000; - - zroSimd12Elm3 = GetEmitter()->emitSimd16Const(constValue); - } - + simd16_t constValue; + constValue.u32[0] = 0xFFFFFFFF; + constValue.u32[1] = 0xFFFFFFFF; + constValue.u32[2] = 0xFFFFFFFF; + constValue.u32[3] = 0x00000000; + CORINFO_FIELD_HANDLE zroSimd12Elm3 = GetEmitter()->emitSimd16Const(constValue); GetEmitter()->emitIns_SIMD_R_R_C(INS_andps, EA_16BYTE, tgtReg, tgtReg, zroSimd12Elm3, 0, INS_OPTS_NONE); } } diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index cd5b97f0220b13..6a9196bc66d4a7 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // =================================================================================================== @@ -2230,33 +2230,7 @@ unsigned CountDigits(double num, unsigned base /* = 10 */) double FloatingPointUtils::convertUInt64ToDouble(unsigned __int64 uIntVal) { - __int64 s64 = uIntVal; - double d; - if (s64 < 0) - { -#if defined(TARGET_XARCH) - // RyuJIT codegen and clang (or gcc) may produce different results for casting uint64 to - // double, and the clang result is more accurate. For example, - // 1) (double)0x84595161401484A0UL --> 43e08b2a2c280290 (RyuJIT codegen or VC++) - // 2) (double)0x84595161401484A0UL --> 43e08b2a2c280291 (clang or gcc) - // If the folding optimization below is implemented by simple casting of (double)uint64_val - // and it is compiled by clang, casting result can be inconsistent, depending on whether - // the folding optimization is triggered or the codegen generates instructions for casting. // - // The current solution is to force the same math as the codegen does, so that casting - // result is always consistent. - - // d = (double)(int64_t)uint64 + 0x1p64 - uint64_t adjHex = 0x43F0000000000000UL; - d = (double)s64 + *(double*)&adjHex; -#else - d = (double)uIntVal; -#endif - } - else - { - d = (double)uIntVal; - } - return d; + return (double)uIntVal; } float FloatingPointUtils::convertUInt64ToFloat(unsigned __int64 u64) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.cs b/src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.cs new file mode 100644 index 00000000000000..038f61d1b14e5d --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.cs @@ -0,0 +1,34 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + +public static class Runtime_102149 +{ + [Fact] + public static void Test() + { + if (BitConverter.DoubleToUInt64Bits(U8_to_R8(Const(9303915604039947368UL))) != 4890948523238023291UL) + throw new InvalidOperationException("Case 1 failed"); + + if (BitConverter.DoubleToUInt64Bits((double)Const(9303915604039947368UL)) != 4890948523238023291UL) + throw new InvalidOperationException("Case 2 failed"); + + if (NonConst(10648738977740919977d) != NonConst(10648738977740919977UL)) + throw new InvalidOperationException("Case 3 failed"); + + if (Const(10648738977740919977d) != Const(10648738977740919977UL)) + throw new InvalidOperationException("Case 4 failed"); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static T Const(T val) => val; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static T NonConst(T val) => val; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static double U8_to_R8(ulong x) => x; +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.csproj new file mode 100644 index 00000000000000..de6d5e08882e86 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.csproj @@ -0,0 +1,8 @@ + + + True + + + + +