Skip to content

Commit

Permalink
Spill all nodes multi-reg morphing does not handle (#71127)
Browse files Browse the repository at this point in the history
* Spill all nodes multi-reg morphing does not handle

This (now) includes COMMAs.

* Add a test
  • Loading branch information
SingleAccretion authored Jun 23, 2022
1 parent 34d5286 commit b122149
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 56 deletions.
5 changes: 5 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2136,6 +2136,11 @@ struct GenTree
gtFlags |= sourceFlags;
}

void AddAllEffectsFlags(GenTree* source)
{
AddAllEffectsFlags(source->gtFlags & GTF_ALL_EFFECT);
}

void AddAllEffectsFlags(GenTree* firstSource, GenTree* secondSource)
{
AddAllEffectsFlags((firstSource->gtFlags | secondSource->gtFlags) & GTF_ALL_EFFECT);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,8 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr,
// Instead, we're going to sink the assignment below the COMMA.
src->AsOp()->gtOp2 =
impAssignStructPtr(destAddr, src->AsOp()->gtOp2, structHnd, curLevel, pAfterStmt, usedDI, block);
src->AddAllEffectsFlags(src->AsOp()->gtOp2);

return src;
}

Expand Down
114 changes: 58 additions & 56 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -998,76 +998,78 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call)
bool isMultiRegArg = (arg.AbiInfo.NumRegs > 1);
#endif

if (varTypeIsStruct(argx->TypeGet()) && !arg.m_needTmp)
if (varTypeIsStruct(argx) && !arg.m_needTmp)
{
if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
if (isMultiRegArg)
{
// Spill multireg struct arguments that have Assignments or Calls embedded in them
SetNeedsTemp(&arg);
}
else
{
// We call gtPrepareCost to measure the cost of evaluating this tree
comp->gtPrepareCost(argx);

if (isMultiRegArg && (argx->GetCostEx() > (6 * IND_COST_EX)))
if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)
{
// Spill multireg struct arguments that are expensive to evaluate twice
// Spill multireg struct arguments that have Assignments or Calls embedded in them.
SetNeedsTemp(&arg);
}
#if defined(FEATURE_SIMD) && defined(TARGET_ARM64)
else if (isMultiRegArg && varTypeIsSIMD(argx) && (argx->OperIsSimdOrHWintrinsic() || argx->IsCnsVec()))
else if (!argx->OperIsLocalRead() && !argx->OperIsIndir())
{
// Multi-reg morphing does not handle these SIMD nodes.
// TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing.
SetNeedsTemp(&arg);
}
#endif
#ifndef TARGET_ARM
// TODO-Arm: This optimization is not implemented for ARM32
// so we skip this for ARM32 until it is ported to use RyuJIT backend
//
else if (argx->OperGet() == GT_OBJ)
else
{
GenTreeObj* argObj = argx->AsObj();
unsigned structSize = argObj->GetLayout()->GetSize();
switch (structSize)
// Finally, we call gtPrepareCost to measure the cost of evaluating this tree.
comp->gtPrepareCost(argx);

if (argx->GetCostEx() > (6 * IND_COST_EX))
{
case 3:
case 5:
case 6:
case 7:
// If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
//
if (argObj->AsObj()->gtOp1->IsLocalAddrExpr() == nullptr) // Is the source not a LclVar?
{
// If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
// For now we use a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
//
SetNeedsTemp(&arg);
}
break;
case 11:
case 13:
case 14:
case 15:
// Spill any GT_OBJ multireg structs that are difficult to extract
//
// When we have a GT_OBJ of a struct with the above sizes we would need
// to use 3 or 4 load instructions to load the exact size of this struct.
// Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
// will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
// Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
// the argument.
// Spill multireg struct arguments that are expensive to evaluate twice.
SetNeedsTemp(&arg);
}
}
}

#ifndef TARGET_ARM
// TODO-Arm: This optimization is not implemented for ARM32
// so we skip this for ARM32 until it is ported to use RyuJIT backend
//
if (argx->OperGet() == GT_OBJ)
{
GenTreeObj* argObj = argx->AsObj();
unsigned structSize = argObj->GetLayout()->GetSize();
switch (structSize)
{
case 3:
case 5:
case 6:
case 7:
// If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
//
if (argObj->AsObj()->gtOp1->IsLocalAddrExpr() == nullptr) // Is the source not a LclVar?
{
// If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
// For now we use a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
//
SetNeedsTemp(&arg);
break;
}
break;
case 11:
case 13:
case 14:
case 15:
// Spill any GT_OBJ multireg structs that are difficult to extract
//
// When we have a GT_OBJ of a struct with the above sizes we would need
// to use 3 or 4 load instructions to load the exact size of this struct.
// Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
// will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
// Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
// the argument.
//
SetNeedsTemp(&arg);
break;

default:
break;
}
default:
break;
}
#endif // !TARGET_ARM
}
#endif // !TARGET_ARM
}
#endif // FEATURE_MULTIREG_ARGS
}
Expand Down Expand Up @@ -3754,7 +3756,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg)
{
GenTree* argNode = arg->GetNode();
assert(varTypeIsStruct(argNode->TypeGet()));
assert(varTypeIsStruct(argNode));

#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
NYI("fgMorphMultiregStructArg requires implementation for this target");
Expand Down
28 changes: 28 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_71118/Runtime_71118.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Numerics;
using System.Runtime.CompilerServices;

public class Runtime_71118
{
public static int Main()
{
return Problem(new ClassWithVtor4 { Vtor4FieldTwo = new Vector4(1, 2, 3, 4) }) ? 101 : 100;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static bool Problem(ClassWithVtor4 a)
{
return CallForVtor4(a.Vtor4FieldTwo) != a.Vtor4FieldTwo.X;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static float CallForVtor4(Vector4 vtor) => vtor.X;

class ClassWithVtor4
{
public Vector4 Vtor4FieldOne;
public Vector4 Vtor4FieldTwo;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<Optimize>True</Optimize>
<CLRTestBatchPreCommands><![CDATA[
$(CLRTestBatchPreCommands)
set DOTNET_JitStressModeNamesOnly=1
set DOTNET_JitStressModeNames=STRESS_NULL_OBJECT_CHECK
]]></CLRTestBatchPreCommands>
<BashCLRTestPreCommands><![CDATA[
$(BashCLRTestPreCommands)
export DOTNET_JitStressModeNamesOnly=1
export DOTNET_JitStressModeNames=STRESS_NULL_OBJECT_CHECK
]]></BashCLRTestPreCommands>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>

0 comments on commit b122149

Please sign in to comment.