Skip to content

Commit

Permalink
Fix ulong->double conversion on x64 (without avx512f) (#102179)
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo authored May 15, 2024
1 parent 8698d3d commit 9d0142f
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 115 deletions.
16 changes: 0 additions & 16 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,6 @@ class CodeGen final : public CodeGenInterface

private:
#if defined(TARGET_XARCH)
// Bit masks used in negating a float or double number.
// This is to avoid creating more than one data constant for these bitmasks when a
// method has more than one GT_NEG operation on floating point values.
CORINFO_FIELD_HANDLE negBitmaskFlt;
CORINFO_FIELD_HANDLE negBitmaskDbl;

// Bit masks used in computing Math.Abs() of a float or double number.
CORINFO_FIELD_HANDLE absBitmaskFlt;
CORINFO_FIELD_HANDLE absBitmaskDbl;

// Bit mask used in zeroing the 3rd element of a SIMD12
CORINFO_FIELD_HANDLE zroSimd12Elm3;

// Bit mask used in U8 -> double conversion to adjust the result.
CORINFO_FIELD_HANDLE u8ToDblBitmask;

// Generates SSE2 code for the given tree as "Operand BitWiseOp BitMask"
void genSSE2BitwiseOp(GenTree* treeNode);

Expand Down
9 changes: 0 additions & 9 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,15 +196,6 @@ void CodeGenInterface::CopyRegisterInfo()
CodeGen::CodeGen(Compiler* theCompiler)
: CodeGenInterface(theCompiler)
{
#if defined(TARGET_XARCH)
negBitmaskFlt = nullptr;
negBitmaskDbl = nullptr;
absBitmaskFlt = nullptr;
absBitmaskDbl = nullptr;
zroSimd12Elm3 = nullptr;
u8ToDblBitmask = nullptr;
#endif // defined(TARGET_XARCH)

#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(TARGET_X86)
m_stkArgVarNum = BAD_VAR_NUM;
#endif
Expand Down
96 changes: 47 additions & 49 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7318,47 +7318,53 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
// integral to floating-point conversions all have RMW semantics if VEX support
// is not available

bool isRMW = !compiler->canUseVexEncoding();
inst_RV_RV_TT(ins, emitTypeSize(srcType), targetReg, targetReg, op1, isRMW, INS_OPTS_NONE);
const bool isRMW = !compiler->canUseVexEncoding();

// Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
// will interpret ULONG value as LONG. Hence we need to adjust the
// result if sign-bit of srcType is set.
if (srcType == TYP_ULONG)
{
// The instruction sequence below is less accurate than what clang
// and gcc generate. However, we keep the current sequence for backward compatibility.
// If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
// should be also updated for consistent conversion result.
assert(dstType == TYP_DOUBLE);
assert(op1->isUsedFromReg());

// Set the flags without modifying op1.
// test op1Reg, op1Reg
inst_RV_RV(INS_test, op1->GetRegNum(), op1->GetRegNum(), srcType);
// The following LONG->DOUBLE cast machinery is based on clang's implementation
// with "-ffp-model=strict" flag:
//
// mov tmp1, arg (8 byte)
// shr tmp1
// mov tmp2, arg (4 byte)
// and tmp2, 1
// or tmp2, tmp1
// test arg, arg
// cmovns tmp2, arg
// cvtsi2sd xmm0, tmp2
// jns .LABEL
// addsd xmm0, xmm0
//.LABEL
//
regNumber argReg = treeNode->gtGetOp1()->GetRegNum();
regNumber tmpReg1 = internalRegisters.Extract(treeNode);
regNumber tmpReg2 = internalRegisters.Extract(treeNode);

inst_Mov(TYP_LONG, tmpReg1, argReg, /* canSkip */ false, EA_8BYTE);
inst_RV_SH(INS_shr, EA_8BYTE, tmpReg1, 1);
inst_Mov(TYP_INT, tmpReg2, argReg, /* canSkip */ false, EA_4BYTE);
GetEmitter()->emitIns_R_I(INS_and, EA_4BYTE, tmpReg2, 1);
GetEmitter()->emitIns_R_R(INS_or, EA_8BYTE, tmpReg2, tmpReg1);
GetEmitter()->emitIns_R_R(INS_test, EA_8BYTE, argReg, argReg);
GetEmitter()->emitIns_R_R(INS_cmovns, EA_8BYTE, tmpReg2, argReg);
GetEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, tmpReg2);

// No need to adjust result if op1 >= 0 i.e. positive
// Jge label
BasicBlock* label = genCreateTempLabel();
inst_JMP(EJ_jge, label);

// Adjust the result
// result = result + 0x43f00000 00000000
// addsd resultReg, 0x43f00000 00000000
CORINFO_FIELD_HANDLE* cns = &u8ToDblBitmask;
if (*cns == nullptr)
{
double d;
static_assert_no_msg(sizeof(double) == sizeof(__int64));
*((__int64*)&d) = 0x43f0000000000000LL;

*cns = GetEmitter()->emitFltOrDblConst(d, EA_8BYTE);
}
GetEmitter()->emitIns_SIMD_R_R_C(INS_addsd, EA_8BYTE, targetReg, targetReg, *cns, 0, INS_OPTS_NONE);

inst_JMP(EJ_jns, label);
GetEmitter()->emitIns_R_R(INS_addsd, EA_8BYTE, targetReg, targetReg);
genDefineTempLabel(label);
}

else
{
inst_RV_RV_TT(ins, emitTypeSize(srcType), targetReg, targetReg, op1, isRMW, INS_OPTS_NONE);
}
genProduceReg(treeNode);
}

Expand Down Expand Up @@ -7685,54 +7691,46 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode)
{
regNumber targetReg = treeNode->GetRegNum();
regNumber operandReg = genConsumeReg(treeNode->gtGetOp1());
emitAttr size = emitTypeSize(treeNode);

assert(varTypeIsFloating(treeNode->TypeGet()));
assert(treeNode->gtGetOp1()->isUsedFromReg());

CORINFO_FIELD_HANDLE* maskFld = nullptr;
UINT64 mask = 0;
instruction ins = INS_invalid;
CORINFO_FIELD_HANDLE maskFld = NO_FIELD_HANDLE;
UINT64 mask = 0;
instruction ins = INS_invalid;

if (treeNode->OperIs(GT_NEG))
{
// Neg(x) = flip the sign bit.
// Neg(f) = f ^ 0x80000000 x4 (packed)
// Neg(d) = d ^ 0x8000000000000000 x2 (packed)
ins = INS_xorps;
mask = treeNode->TypeIs(TYP_FLOAT) ? 0x8000000080000000UL : 0x8000000000000000UL;
maskFld = treeNode->TypeIs(TYP_FLOAT) ? &negBitmaskFlt : &negBitmaskDbl;
ins = INS_xorps;
mask = treeNode->TypeIs(TYP_FLOAT) ? 0x8000000080000000UL : 0x8000000000000000UL;
}
else if (treeNode->OperIs(GT_INTRINSIC))
{
assert(treeNode->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs);
// Abs(x) = set sign-bit to zero
// Abs(f) = f & 0x7fffffff x4 (packed)
// Abs(d) = d & 0x7fffffffffffffff x2 (packed)
ins = INS_andps;
mask = treeNode->TypeIs(TYP_FLOAT) ? 0x7FFFFFFF7FFFFFFFUL : 0x7FFFFFFFFFFFFFFFUL;
maskFld = treeNode->TypeIs(TYP_FLOAT) ? &absBitmaskFlt : &absBitmaskDbl;
ins = INS_andps;
mask = treeNode->TypeIs(TYP_FLOAT) ? 0x7FFFFFFF7FFFFFFFUL : 0x7FFFFFFFFFFFFFFFUL;
}
else
{
assert(!"genSSE2BitwiseOp: unsupported oper");
}

if (*maskFld == nullptr)
{
simd16_t constValue;

constValue.u64[0] = mask;
constValue.u64[1] = mask;

simd16_t constValue;
constValue.u64[0] = mask;
constValue.u64[1] = mask;
#if defined(FEATURE_SIMD)
*maskFld = GetEmitter()->emitSimd16Const(constValue);
maskFld = GetEmitter()->emitSimd16Const(constValue);
#else
*maskFld = GetEmitter()->emitBlkConst(&constValue, 16, 16, treeNode->TypeGet());
maskFld = GetEmitter()->emitBlkConst(&constValue, 16, 16, treeNode->TypeGet());
#endif
}

GetEmitter()->emitIns_SIMD_R_R_C(ins, EA_16BYTE, targetReg, operandReg, *maskFld, 0, INS_OPTS_NONE);
GetEmitter()->emitIns_SIMD_R_R_C(ins, EA_16BYTE, targetReg, operandReg, maskFld, 0, INS_OPTS_NONE);
}

//-----------------------------------------------------------------------------------------
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2827,6 +2827,15 @@ int LinearScan::BuildCast(GenTreeCast* cast)
const var_types srcType = genActualType(src->TypeGet());
const var_types castType = cast->gtCastType;

if ((srcType == TYP_LONG) && (castType == TYP_DOUBLE) &&
!compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
// We need two extra temp regs for LONG->DOUBLE cast
// if we don't have AVX512F available.
buildInternalIntRegisterDefForNode(cast);
buildInternalIntRegisterDefForNode(cast);
}

regMaskTP candidates = RBM_NONE;
#ifdef TARGET_X86
if (varTypeIsByte(castType))
Expand Down
19 changes: 6 additions & 13 deletions src/coreclr/jit/simdcodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,19 +536,12 @@ void CodeGen::genSimd12UpperClear(regNumber tgtReg)
else
{
// Preserve element 0, 1, and 2; Zero element 3

if (zroSimd12Elm3 == NO_FIELD_HANDLE)
{
simd16_t constValue;

constValue.u32[0] = 0xFFFFFFFF;
constValue.u32[1] = 0xFFFFFFFF;
constValue.u32[2] = 0xFFFFFFFF;
constValue.u32[3] = 0x00000000;

zroSimd12Elm3 = GetEmitter()->emitSimd16Const(constValue);
}

simd16_t constValue;
constValue.u32[0] = 0xFFFFFFFF;
constValue.u32[1] = 0xFFFFFFFF;
constValue.u32[2] = 0xFFFFFFFF;
constValue.u32[3] = 0x00000000;
CORINFO_FIELD_HANDLE zroSimd12Elm3 = GetEmitter()->emitSimd16Const(constValue);
GetEmitter()->emitIns_SIMD_R_R_C(INS_andps, EA_16BYTE, tgtReg, tgtReg, zroSimd12Elm3, 0, INS_OPTS_NONE);
}
}
Expand Down
30 changes: 2 additions & 28 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

// ===================================================================================================
Expand Down Expand Up @@ -2230,33 +2230,7 @@ unsigned CountDigits(double num, unsigned base /* = 10 */)

double FloatingPointUtils::convertUInt64ToDouble(unsigned __int64 uIntVal)
{
__int64 s64 = uIntVal;
double d;
if (s64 < 0)
{
#if defined(TARGET_XARCH)
// RyuJIT codegen and clang (or gcc) may produce different results for casting uint64 to
// double, and the clang result is more accurate. For example,
// 1) (double)0x84595161401484A0UL --> 43e08b2a2c280290 (RyuJIT codegen or VC++)
// 2) (double)0x84595161401484A0UL --> 43e08b2a2c280291 (clang or gcc)
// If the folding optimization below is implemented by simple casting of (double)uint64_val
// and it is compiled by clang, casting result can be inconsistent, depending on whether
// the folding optimization is triggered or the codegen generates instructions for casting. //
// The current solution is to force the same math as the codegen does, so that casting
// result is always consistent.

// d = (double)(int64_t)uint64 + 0x1p64
uint64_t adjHex = 0x43F0000000000000UL;
d = (double)s64 + *(double*)&adjHex;
#else
d = (double)uIntVal;
#endif
}
else
{
d = (double)uIntVal;
}
return d;
return (double)uIntVal;
}

float FloatingPointUtils::convertUInt64ToFloat(unsigned __int64 u64)
Expand Down
34 changes: 34 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;
using Xunit;

public static class Runtime_102149
{
[Fact]
public static void Test()
{
if (BitConverter.DoubleToUInt64Bits(U8_to_R8(Const(9303915604039947368UL))) != 4890948523238023291UL)
throw new InvalidOperationException("Case 1 failed");

if (BitConverter.DoubleToUInt64Bits((double)Const(9303915604039947368UL)) != 4890948523238023291UL)
throw new InvalidOperationException("Case 2 failed");

if (NonConst(10648738977740919977d) != NonConst(10648738977740919977UL))
throw new InvalidOperationException("Case 3 failed");

if (Const(10648738977740919977d) != Const(10648738977740919977UL))
throw new InvalidOperationException("Case 4 failed");
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static T Const<T>(T val) => val;

[MethodImpl(MethodImplOptions.NoInlining)]
private static T NonConst<T>(T val) => val;

[MethodImpl(MethodImplOptions.NoInlining)]
private static double U8_to_R8(ulong x) => x;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>

0 comments on commit 9d0142f

Please sign in to comment.