diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 2520864b707421..e7b2d366d2c031 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1194,12 +1194,10 @@ class CodeGen final : public CodeGenInterface void genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg); void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); -#ifdef FEATURE_PUT_STRUCT_ARG_STK void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); -#endif // FEATURE_PUT_STRUCT_ARG_STK #if FEATURE_ARG_SPLIT void genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode); #endif // FEATURE_ARG_SPLIT @@ -1287,7 +1285,6 @@ class CodeGen final : public CodeGenInterface void genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArgVarNum); #endif // !TARGET_X86 -#ifdef FEATURE_PUT_STRUCT_ARG_STK #ifdef TARGET_X86 bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk); void genPushReg(var_types type, regNumber srcReg); @@ -1309,7 +1306,6 @@ class CodeGen final : public CodeGenInterface #else void genStructPutArgPartialRepMovs(GenTreePutArgStk* putArgStkNode); #endif -#endif // FEATURE_PUT_STRUCT_ARG_STK void genCodeForStoreBlk(GenTreeBlk* storeBlkNode); void genCodeForInitBlkLoop(GenTreeBlk* initBlkNode); @@ -1403,14 +1399,12 @@ class CodeGen final : public CodeGenInterface return compiler->lvaGetDesc(tree->AsLclVarCommon())->lvIsRegCandidate(); } -#ifdef FEATURE_PUT_STRUCT_ARG_STK #ifdef TARGET_X86 bool m_pushStkArg; #else // !TARGET_X86 unsigned m_stkArgVarNum; unsigned m_stkArgOffset; #endif // !TARGET_X86 -#endif // !FEATURE_PUT_STRUCT_ARG_STK #if defined(DEBUG) && defined(TARGET_XARCH) void genStackPointerCheck(bool doStackPointerCheck, diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 75c2cf474e6d4b..1639b86de810b8 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -201,7 +201,7 @@ void CodeGenInterface::CopyRegisterInfo() CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) { -#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(TARGET_X86) +#if !defined(TARGET_X86) m_stkArgVarNum = BAD_VAR_NUM; #endif diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 945f468d86fa81..7e11a642329602 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1741,7 +1741,6 @@ void CodeGen::genConsumeMultiOpOperands(GenTreeMultiOp* tree) } #endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) -#if FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node. // Also loads in the right register the addresses of the @@ -1824,7 +1823,6 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, inst_RV_IV(INS_mov, sizeReg, size, EA_PTRSIZE); } } -#endif // FEATURE_PUT_STRUCT_ARG_STK #if FEATURE_ARG_SPLIT //------------------------------------------------------------------------ @@ -1899,7 +1897,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg #if FEATURE_FASTTAILCALL if (putArgStk->gtCall->IsFastTailCall()) { - areaSize = compiler->info.compArgStackSize; + areaSize = compiler->lvaParameterStackSize; } #endif diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index e0da7904ecc688..de93862822b21a 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3485,7 +3485,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) } } -#ifdef FEATURE_PUT_STRUCT_ARG_STK // Generate code for a load from some address + offset // base: tree node which can be either a local or an indir // offset: distance from the "base" location from which to load @@ -3502,7 +3501,6 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst GetEmitter()->emitIns_R_AR(ins, size, dst, base->AsIndir()->Addr()->GetRegNum(), offset); } } -#endif // FEATURE_PUT_STRUCT_ARG_STK //---------------------------------------------------------------------------------- // genCodeForCpBlkUnroll - Generate unrolled block copy code. @@ -3790,7 +3788,6 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode) instGen(INS_r_movsb); } -#ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area // @@ -4219,7 +4216,6 @@ void CodeGen::genClearStackVec3ArgUpperBits() } } #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) -#endif // FEATURE_PUT_STRUCT_ARG_STK // // genCodeForCpObj - Generate code for CpObj nodes to copy structs that have interleaved @@ -5993,13 +5989,11 @@ void CodeGen::genCall(GenTreeCall* call) #ifdef DEBUG assert(argSize == arg.AbiInfo.ByteSize); -#ifdef FEATURE_PUT_STRUCT_ARG_STK if (source->TypeIs(TYP_STRUCT) && !source->OperIs(GT_FIELD_LIST)) { unsigned loadSize = source->GetLayout(compiler)->GetSize(); assert(argSize == roundUp(loadSize, TARGET_POINTER_SIZE)); } -#endif // FEATURE_PUT_STRUCT_ARG_STK #endif // DEBUG } } @@ -8392,8 +8386,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) { unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk); -#ifdef UNIX_AMD64_ABI - if (data->OperIs(GT_FIELD_LIST)) { genPutArgStkFieldList(putArgStk, baseVarNum); @@ -8407,7 +8399,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) m_stkArgVarNum = BAD_VAR_NUM; return; } -#endif // UNIX_AMD64_ABI noway_assert(targetType != TYP_STRUCT); @@ -8509,7 +8500,6 @@ void CodeGen::genPushReg(var_types type, regNumber srcReg) } #endif // TARGET_X86 -#if defined(FEATURE_PUT_STRUCT_ARG_STK) // genStoreRegToStackArg: Store a register value into the stack argument area // // Arguments: @@ -8649,7 +8639,6 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) unreached(); } } -#endif // defined(FEATURE_PUT_STRUCT_ARG_STK) /***************************************************************************** * diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f8855bfc41ab2d..ae1b0da5d21f34 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6779,7 +6779,7 @@ class Compiler #endif // FEATURE_SIMD GenTree* fgMorphIndexAddr(GenTreeIndexAddr* tree); GenTree* fgMorphExpandCast(GenTreeCast* tree); - GenTreeFieldList* fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl); + GenTreeFieldList* fgMorphLclArgToFieldList(GenTreeLclVarCommon* lcl); GenTreeCall* fgMorphArgs(GenTreeCall* call); void fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg); @@ -11802,8 +11802,7 @@ class Compiler const CORINFO_FPSTRUCT_LOWERING* GetFpStructLowering(CORINFO_CLASS_HANDLE structHandle); #endif // defined(UNIX_AMD64_ABI) - void fgMorphMultiregStructArgs(GenTreeCall* call); - GenTree* fgMorphMultiregStructArg(CallArg* arg); + bool fgTryMorphStructArg(CallArg* arg); bool killGCRefs(GenTree* tree); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 46c57705853a1f..c90f4ee43ed9d5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -261,14 +261,12 @@ void GenTree::InitNodeSize() GenTree::s_gtNodeSizes[GT_MOD] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_UMOD] = TREE_NODE_SZ_LARGE; #endif -#ifdef FEATURE_PUT_STRUCT_ARG_STK // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE; #if FEATURE_ARG_SPLIT GenTree::s_gtNodeSizes[GT_PUTARG_SPLIT] = TREE_NODE_SZ_LARGE; #endif // FEATURE_ARG_SPLIT -#endif // FEATURE_PUT_STRUCT_ARG_STK // This list of assertions should come to contain all GenTree subtypes that are declared // "small". @@ -324,16 +322,12 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeILOffset) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node -#ifndef FEATURE_PUT_STRUCT_ARG_STK - static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL); -#else // FEATURE_PUT_STRUCT_ARG_STK // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE); #if FEATURE_ARG_SPLIT static_assert_no_msg(sizeof(GenTreePutArgSplit) <= TREE_NODE_SZ_LARGE); #endif // FEATURE_ARG_SPLIT -#endif // FEATURE_PUT_STRUCT_ARG_STK #ifdef FEATURE_HW_INTRINSICS static_assert_no_msg(sizeof(GenTreeHWIntrinsic) <= TREE_NODE_SZ_SMALL); @@ -1147,6 +1141,27 @@ void GenTreeFieldList::InsertFieldLIR( m_uses.InsertUse(insertAfter, new (compiler, CMK_ASTNode) Use(node, offset, type)); } +//--------------------------------------------------------------- +// SoleFieldOrThis: +// If this FIELD_LIST has only one field, then return it; otherwise return +// the field list. +// +// Returns: +// Sole field, or "this". +// +GenTree* GenTreeFieldList::SoleFieldOrThis() +{ + Use* head = m_uses.GetHead(); + assert(head != nullptr); + + if (head->GetNext() == nullptr) + { + return head->GetNode(); + } + + return this; +} + //--------------------------------------------------------------- // IsHfaArg: Is this arg considered a homogeneous floating-point aggregate? // @@ -2621,13 +2636,6 @@ void CallArgs::ResetFinalArgsAndABIInfo() m_abiInformationDetermined = false; } -#if !defined(FEATURE_PUT_STRUCT_ARG_STK) -unsigned GenTreePutArgStk::GetStackByteSize() const -{ - return genTypeSize(genActualType(gtOp1->gtType)); -} -#endif // !defined(FEATURE_PUT_STRUCT_ARG_STK) - /***************************************************************************** * * Returns non-zero if the two trees are identical. @@ -3048,6 +3056,25 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) return false; } +//------------------------------------------------------------------------ +// EffectiveUse: Return the use pointer to the "effective val". +// +// Arguments: +// use - Use edge +// +// Return Value: +// Edge pointing to non-comma node. +// +GenTree** GenTree::EffectiveUse(GenTree** use) +{ + while ((*use)->OperIs(GT_COMMA)) + { + use = &(*use)->AsOp()->gtOp2; + } + + return use; +} + //------------------------------------------------------------------------ // gtHasRef: Find out whether the given tree contains a local. // @@ -12733,7 +12760,6 @@ void Compiler::gtDispTree(GenTree* tree, } } } -#if FEATURE_PUT_STRUCT_ARG_STK else if (tree->OperGet() == GT_PUTARG_STK) { const GenTreePutArgStk* putArg = tree->AsPutArgStk(); @@ -12766,7 +12792,6 @@ void Compiler::gtDispTree(GenTree* tree, printf(" (%d stackByteSize), (%d numRegs)", putArg->GetStackByteSize(), putArg->gtNumRegs); } #endif // FEATURE_ARG_SPLIT -#endif // FEATURE_PUT_STRUCT_ARG_STK if (tree->OperIs(GT_FIELD_ADDR)) { diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 2fb34c31a5cc7b..93578a3cd3d2ba 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1794,7 +1794,6 @@ struct GenTree bool OperSupportsReverseOpEvalOrder(Compiler* comp) const; static bool RequiresNonNullOp2(genTreeOps oper); - bool IsValidCallArgument(); #endif // DEBUG inline bool IsIntegralConst(ssize_t constVal) const; @@ -1938,6 +1937,8 @@ struct GenTree //--------------------------------------------------------------------- + static GenTree** EffectiveUse(GenTree** use); + #if defined(DEBUG) || CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_MEM_ALLOC || \ NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS || DUMP_FLOWGRAPHS static const char* OpName(genTreeOps op); @@ -2829,6 +2830,8 @@ struct GenTreeFieldList : public GenTree // Insert a new field use after the specified use without updating side effect flags. void InsertFieldLIR(Compiler* compiler, Use* insertAfter, GenTree* node, unsigned offset, var_types type); + GenTree* SoleFieldOrThis(); + //-------------------------------------------------------------------------- // Equals: Check if 2 FIELD_LIST nodes are equal. // @@ -4948,8 +4951,6 @@ class CallArgs void SetNeedsTemp(CallArg* arg); bool IsNonStandard(Compiler* comp, GenTreeCall* call, CallArg* arg); - GenTree* MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum); - // clang-format off bool HasThisPointer() const { return m_hasThisPointer; } bool HasRetBuffer() const { return m_hasRetBuffer; } @@ -8654,9 +8655,7 @@ struct GenTreePutArgStk : public GenTreeUnOp { private: unsigned m_byteOffset; -#ifdef FEATURE_PUT_STRUCT_ARG_STK unsigned m_byteSize; // The number of bytes that this argument is occupying on the stack with padding. -#endif public: #if defined(UNIX_X86_ABI) @@ -8674,7 +8673,6 @@ struct GenTreePutArgStk : public GenTreeUnOp // In future if we need to add more such bool fields consider bit fields. #endif -#ifdef FEATURE_PUT_STRUCT_ARG_STK // Instruction selection: during codegen time, what code sequence we will be using // to encode this operation. // TODO-Throughput: The following information should be obtained from the child @@ -8694,23 +8692,18 @@ struct GenTreePutArgStk : public GenTreeUnOp private: uint8_t m_argLoadSizeDelta; #endif // TARGET_XARCH -#endif // FEATURE_PUT_STRUCT_ARG_STK public: - GenTreePutArgStk(genTreeOps oper, - var_types type, - GenTree* op1, - unsigned stackByteOffset, -#if defined(FEATURE_PUT_STRUCT_ARG_STK) - unsigned stackByteSize, -#endif + GenTreePutArgStk(genTreeOps oper, + var_types type, + GenTree* op1, + unsigned stackByteOffset, + unsigned stackByteSize, GenTreeCall* callNode, bool putInIncomingArgArea) : GenTreeUnOp(oper, type, op1 DEBUGARG(/*largeNode*/ false)) , m_byteOffset(stackByteOffset) -#if defined(FEATURE_PUT_STRUCT_ARG_STK) , m_byteSize(stackByteSize) -#endif #if defined(UNIX_X86_ABI) , gtPadAlign(0) #endif @@ -8720,12 +8713,10 @@ struct GenTreePutArgStk : public GenTreeUnOp #if FEATURE_FASTTAILCALL , gtPutInIncomingArgArea(putInIncomingArgArea) #endif // FEATURE_FASTTAILCALL -#if defined(FEATURE_PUT_STRUCT_ARG_STK) , gtPutArgStkKind(Kind::Invalid) #if defined(TARGET_XARCH) , m_argLoadSizeDelta(UINT8_MAX) #endif -#endif // FEATURE_PUT_STRUCT_ARG_STK { } @@ -8766,7 +8757,6 @@ struct GenTreePutArgStk : public GenTreeUnOp } #endif -#ifdef FEATURE_PUT_STRUCT_ARG_STK unsigned GetStackByteSize() const { return m_byteSize; @@ -8813,9 +8803,6 @@ struct GenTreePutArgStk : public GenTreeUnOp { return gtPutArgStkKind == Kind::Push; } -#else // !FEATURE_PUT_STRUCT_ARG_STK - unsigned GetStackByteSize() const; -#endif // !FEATURE_PUT_STRUCT_ARG_STK #if DEBUGGABLE_GENTREE GenTreePutArgStk() @@ -8831,11 +8818,9 @@ struct GenTreePutArgSplit : public GenTreePutArgStk { unsigned gtNumRegs; - GenTreePutArgSplit(GenTree* op1, - unsigned stackByteOffset, -#if defined(FEATURE_PUT_STRUCT_ARG_STK) - unsigned stackByteSize, -#endif + GenTreePutArgSplit(GenTree* op1, + unsigned stackByteOffset, + unsigned stackByteSize, unsigned numRegs, GenTreeCall* callNode, bool putIncomingArgArea) @@ -8843,9 +8828,7 @@ struct GenTreePutArgSplit : public GenTreePutArgStk TYP_STRUCT, op1, stackByteOffset, -#if defined(FEATURE_PUT_STRUCT_ARG_STK) stackByteSize, -#endif callNode, putIncomingArgArea) , gtNumRegs(numRegs) @@ -9927,50 +9910,6 @@ inline bool GenTree::IsBoxedValue() return (gtOper == GT_BOX) && (gtFlags & GTF_BOX_VALUE); } -#ifdef DEBUG -//------------------------------------------------------------------------ -// IsValidCallArgument: Given an GenTree node that represents an argument -// enforce (or don't enforce) the following invariant. -// -// Arguments: -// instance method for a GenTree node -// -// Return values: -// true: the GenTree node is accepted as a valid argument -// false: the GenTree node is not accepted as a valid argument -// -// Notes: -// For targets that don't support arguments as a list of fields, we do not support GT_FIELD_LIST. -// -// Currently for AMD64 UNIX we allow a limited case where a GT_FIELD_LIST is -// allowed but every element must be a GT_LCL_FLD. -// -// For the future targets that allow for Multireg args (and this includes the current ARM64 target), -// or that allow for passing promoted structs, we allow a GT_FIELD_LIST of arbitrary nodes. -// These would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs, -// but could be changed into constants or GT_COMMA trees by the later -// optimization phases. - -inline bool GenTree::IsValidCallArgument() -{ - if (OperIs(GT_FIELD_LIST)) - { -#if !FEATURE_MULTIREG_ARGS && !FEATURE_PUT_STRUCT_ARG_STK - - return false; - -#else // FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK - - // We allow this GT_FIELD_LIST as an argument - return true; - -#endif // FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK - } - // We don't have either kind of list, so it satisfies the invariant. - return true; -} -#endif // DEBUG - inline GenTree* GenTree::gtGetOp1() const { return AsOp()->gtOp1; diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 983103b80cef12..837a15faa21b54 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -345,11 +345,6 @@ typedef ptrdiff_t ssize_t; #define UNIX_LOONGARCH64_ONLY(x) #endif // TARGET_LOONGARCH64 -#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || \ - defined(TARGET_RISCV64) -#define FEATURE_PUT_STRUCT_ARG_STK 1 -#endif - #if defined(UNIX_AMD64_ABI) #define UNIX_AMD64_ABI_ONLY_ARG(x) , x #define UNIX_AMD64_ABI_ONLY(x) x diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index bd8997941de4b7..5f8bdd9f27b3f6 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1693,10 +1693,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, const bool putInIncomingArgArea = call->IsFastTailCall(); putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg, stackSeg.GetStackOffset(), -#ifdef FEATURE_PUT_STRUCT_ARG_STK - stackSeg.GetStackSize(), -#endif - call, putInIncomingArgArea); + stackSeg.GetStackSize(), call, putInIncomingArgArea); } } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index f3d6ad039bcf17..2f99e1a1423f5d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -669,7 +669,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) return; } -#ifdef FEATURE_PUT_STRUCT_ARG_STK if (src->TypeIs(TYP_STRUCT)) { assert(src->OperIs(GT_BLK) || src->OperIsLocalRead()); @@ -756,7 +755,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) { return; } -#endif // FEATURE_PUT_STRUCT_ARG_STK assert(!src->TypeIs(TYP_STRUCT)); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 294f67e612fbb3..0cd162a6c5a25c 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -536,11 +536,9 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; break; -#ifdef FEATURE_PUT_STRUCT_ARG_STK case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; -#endif // FEATURE_PUT_STRUCT_ARG_STK case GT_STORE_BLK: srcCount = BuildBlockStore(tree->AsBlk()); @@ -1679,7 +1677,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) return useCount; } -#ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK. // @@ -1853,7 +1850,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) return srcCount; } -#endif // FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // BuildLclHeap: Set the NodeInfo for a GT_LCLHEAP. diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 43b2d527c14795..24baa3519e073e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1030,64 +1030,6 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) prevExceptionTree = argx; prevExceptionFlags = exceptionFlags; } - -#if FEATURE_MULTIREG_ARGS - // In "fgMorphMultiRegStructArg" we will expand the arg into a GT_FIELD_LIST with multiple indirections, so - // here we consider spilling it into a local. We also need to spill it in case we have a node that we do not - // currently handle in multi-reg morphing. - // This logic can be skipped when the arg is already in the right multireg arg shape. - // - if (varTypeIsStruct(argx) && !arg.m_needTmp && !argx->OperIs(GT_FIELD_LIST)) - { - if ((arg.AbiInfo.NumRegs > 0) && ((arg.AbiInfo.NumRegs + arg.AbiInfo.GetStackSlotsNumber()) > 1)) - { - if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) - { - // Spill multireg struct arguments that have stores or calls embedded in them. - SetNeedsTemp(&arg); - } - else if (!argx->OperIsLocalRead() && !argx->OperIsLoad()) - { - // TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing. - SetNeedsTemp(&arg); - } - else if (comp->opts.OptimizationEnabled()) - { - // Finally, we call gtPrepareCost to measure the cost of evaluating this tree. - comp->gtPrepareCost(argx); - - if (argx->GetCostEx() > (6 * IND_COST_EX)) - { - // Spill multireg struct arguments that are expensive to evaluate twice. - SetNeedsTemp(&arg); - } - } - } - - // We are only able to expand certain "BLK"s into field lists, so here we spill all the - // "mis-sized" ones. We could in theory support them directly with some arithmetic and - // shifts, but these cases are rare enough that it is probably not worth the complexity. - // No need to do this for stack args as they are directly supported by codegen. - // - if (argx->OperIs(GT_BLK) && (arg.AbiInfo.GetRegNum() != REG_STK)) - { - GenTreeBlk* argObj = argx->AsBlk(); - unsigned structSize = argObj->Size(); - unsigned lastLoadSize = structSize % TARGET_POINTER_SIZE; - - if ((lastLoadSize != 0) && !isPow2(lastLoadSize)) - { -#ifdef TARGET_ARM - // On ARM we don't expand split args larger than 16 bytes into field lists. - if (!arg.AbiInfo.IsSplit() || (structSize <= 16)) -#endif // TARGET_ARM - { - SetNeedsTemp(&arg); - } - } - } - } -#endif // FEATURE_MULTIREG_ARGS } #if FEATURE_FIXED_OUT_ARGS @@ -1475,51 +1417,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) assert(argsRemaining == 0); } -//------------------------------------------------------------------------------ -// MakeTmpArgNode: -// Create a temp for an argument if needed. We usually need this to be done -// in order to enforce ordering of the evaluation of arguments. -// -// Return Value: -// the newly created temp var tree. -// -GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum) -{ - LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); - var_types argType = varDsc->TypeGet(); - assert(genActualType(argType) == genActualType(arg->GetSignatureType())); - - GenTree* argNode = nullptr; - - if (varTypeIsStruct(argType)) - { - if (arg->NewAbiInfo.IsPassedByReference()) - { - argNode = comp->gtNewLclVarAddrNode(lclNum); - comp->lvaSetVarAddrExposed(lclNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); - } - // TODO-CQ: currently this mirrors the logic in "fgMorphArgs", but actually we only need - // this retyping for args passed in a single register: "(NumRegs == 1) && !IsSplit()". - else if (arg->AbiInfo.ArgType != TYP_STRUCT) - { - argNode = comp->gtNewLclFldNode(lclNum, arg->AbiInfo.ArgType, 0); - comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg)); - } - else - { - // We are passing this struct by value in multiple registers and/or on stack. - argNode = comp->gtNewLclvNode(lclNum, argType); - } - } - else - { - assert(!arg->NewAbiInfo.IsPassedByReference()); - argNode = comp->gtNewLclvNode(lclNum, argType); - } - - return argNode; -} - //------------------------------------------------------------------------------ // EvalArgsToTemps: Handle arguments that were marked as requiring temps. // @@ -1601,29 +1498,36 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) } #endif -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - noway_assert(argx->gtType != TYP_STRUCT); -#endif - - if (argx->OperIs(GT_FIELD_LIST)) + GenTree* argxEffectiveVal = argx->gtEffectiveVal(); + if (argxEffectiveVal->OperIs(GT_FIELD_LIST)) { - GenTreeFieldList* fieldList = argx->AsFieldList(); + GenTreeFieldList* fieldList = argxEffectiveVal->AsFieldList(); fieldList->gtFlags &= ~GTF_ALL_EFFECT; - for (GenTreeFieldList::Use& use : fieldList->Uses()) - { - unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); - GenTree* store = comp->gtNewTempStore(tmpVarNum, use.GetNode()); - store->SetMorphed(comp); + auto appendEffect = [=, &setupArg](GenTree* effect) { if (setupArg == nullptr) { - setupArg = store; + setupArg = effect; } else { - setupArg = comp->gtNewOperNode(GT_COMMA, TYP_VOID, setupArg, store); + setupArg = comp->gtNewOperNode(GT_COMMA, TYP_VOID, setupArg, effect); setupArg->SetMorphed(comp); } + }; + + for (GenTree* comma = argx; comma->OperIs(GT_COMMA); comma = comma->gtGetOp2()) + { + appendEffect(comma->gtGetOp1()); + } + + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); + GenTree* store = comp->gtNewTempStore(tmpVarNum, use.GetNode()); + store->SetMorphed(comp); + + appendEffect(store); GenTree* setupUse = comp->gtNewLclvNode(tmpVarNum, genActualType(use.GetNode())); setupUse->SetMorphed(comp); @@ -1662,6 +1566,8 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) { // Create a GT_LCL_FLD using the wider type to go to the late argument list defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); + + comp->lvaSetVarDoNotEnregister(tmpVarNum DEBUGARG(DoNotEnregisterReason::LocalField)); } else { @@ -1700,14 +1606,6 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) defArg = argx; -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - - // All structs are either passed (and retyped) as integral types, OR they - // are passed by reference. - noway_assert(argx->gtType != TYP_STRUCT); - -#endif // !(defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) - #ifdef DEBUG if (comp->verbose) { @@ -2408,9 +2306,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // information about late arguments in CallArgs. // This information is used later to construct the late args - // Note that this name a misnomer - it indicates that there are struct args - // that are passed by value in more than one register or on stack. - bool hasMultiregStructArgs = false; for (CallArg& arg : call->gtArgs.Args()) { GenTree** parentArgx = &arg.EarlyNodeRef(); @@ -2453,287 +2348,33 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) argx->gtType = TYP_I_IMPL; } - bool isStructArg = varTypeIsStruct(arg.GetSignatureType()); - GenTree* argObj = argx->gtEffectiveVal(); - bool makeOutArgCopy = false; - - if (argObj->OperIs(GT_FIELD_LIST)) + if (varTypeIsStruct(arg.GetSignatureType()) && !reMorphing) { - // FIELD_LISTs can be created directly by physical promotion. - // Physical promotion will create this shape even for single-reg - // arguments. We strip the field list here for that case as the - // rest of the JIT does not expect single-reg args to be wrapped - // like that. - if (arg.NewAbiInfo.HasExactlyOneRegisterSegment()) + bool makeOutArgCopy = false; + if (arg.NewAbiInfo.IsPassedByReference()) { - GenTreeFieldList* fieldList = argObj->AsFieldList(); - assert(fieldList->Uses().GetHead()->GetNext() == nullptr); - GenTree* node = fieldList->Uses().GetHead()->GetNode(); - - JITDUMP("Replacing single-field FIELD_LIST [%06u] by sole field [%06u]\n", dspTreeID(fieldList), - dspTreeID(node)); - - assert(varTypeUsesSameRegType(node, arg.AbiInfo.ArgType)); - GenTree** effectiveUse = parentArgx; - while ((*effectiveUse)->OperIs(GT_COMMA)) - { - effectiveUse = &(*effectiveUse)->AsOp()->gtOp2; - } - *effectiveUse = node; - - argx = *parentArgx; - argObj = node; + makeOutArgCopy = true; } - } - else if (isStructArg && !reMorphing) - { - unsigned originalSize; - if (argObj->TypeIs(TYP_STRUCT)) + else if (fgTryMorphStructArg(&arg)) { - assert(argObj->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); - originalSize = argObj->GetLayout(this)->GetSize(); + argx = *parentArgx; } else { - originalSize = genTypeSize(argx); - } - - assert(argx->TypeGet() == arg.GetSignatureType()); - assert(originalSize == info.compCompHnd->getClassSize(arg.GetSignatureClassHandle())); - - // First, handle the case where the argument is passed by reference. - if (arg.NewAbiInfo.IsPassedByReference()) - { - assert(arg.AbiInfo.ByteSize == TARGET_POINTER_SIZE); makeOutArgCopy = true; -#ifdef UNIX_AMD64_ABI - assert(!"Structs are not passed by reference on x64/ux"); -#endif // UNIX_AMD64_ABI } - else // This is passed by value. - { - unsigned structSize = originalSize; - unsigned passingSize = originalSize; - - // Check to see if we can transform this struct load (GT_BLK) into a GT_IND of the appropriate size. - // When it can do this is platform-dependent: - // - In general, it can be done for power of 2 structs that fit in a single register. - // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field. - // - This is irrelevant for X86, since structs are always passed by value on the stack. - // - var_types structBaseType = arg.AbiInfo.ArgType; - bool argIsLocal = argObj->OperIsLocalRead(); - bool canTransform = false; - if (structBaseType != TYP_STRUCT) - { - if (isPow2(passingSize)) - { - canTransform = - (!arg.AbiInfo.IsHfaArg() || (passingSize == genTypeSize(arg.AbiInfo.GetHfaType()))); - } - else - { - // We can pass non-power-of-2 structs in a register, but we can only transform in that - // case if the arg is a local. - canTransform = argIsLocal; - passingSize = genTypeSize(structBaseType); - } - } -#if !defined(TARGET_X86) - else - { - hasMultiregStructArgs = true; - } -#endif // !TARGET_X86 - - if (!canTransform) - { -#if defined(TARGET_AMD64) -#ifndef UNIX_AMD64_ABI - // On Windows structs are always copied and passed by reference (handled above) unless they are - // passed by value in a single register. - assert(arg.AbiInfo.GetStackSlotsNumber() == 1); - makeOutArgCopy = true; -#else // UNIX_AMD64_ABI - // On Unix, structs are always passed by value. - // We only need a copy if we have one of the following: - // - The sizes don't match for a non-lclVar argument. - // - We have a known struct type (e.g. SIMD) that requires multiple registers. - // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not - // actually passed in registers. - if (arg.AbiInfo.IsPassedInRegisters()) - { - if (argObj->OperIs(GT_BLK)) - { - if (passingSize != structSize) - { - makeOutArgCopy = true; - } - } - else if (!argIsLocal) - { - // This should only be the case of a value directly producing a known struct type. - assert(argObj->TypeGet() != TYP_STRUCT); - if (arg.AbiInfo.NumRegs > 1) - { - makeOutArgCopy = true; - } - } - } -#endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if ((passingSize != structSize) && !argIsLocal) - { - makeOutArgCopy = true; - } -#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - } - else if (argObj->TypeGet() != structBaseType) - { - // We have a struct argument that fits into a register, and it is either a power of 2, - // or a local. - // Change our argument, as needed, into a value of the appropriate type. - assert(structBaseType != TYP_STRUCT); - - // On RISC-V / LoongArch the passing size may be smaller than the original size if we pass a struct - // according to hardware FP calling convention and it has empty fields -#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - assert(genTypeSize(structBaseType) >= originalSize); -#endif - - if (argObj->OperIsLoad()) - { - assert(argObj->AsIndir()->Size() == genTypeSize(structBaseType)); - argObj->SetOper(GT_IND); - // Use ChangeType over argx to update types in COMMAs as well - argx->ChangeType(structBaseType); - } - else if (argObj->OperIsLocalRead()) - { - unsigned lclNum = argObj->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = lvaGetDesc(lclNum); - unsigned lclOffset = argObj->AsLclVarCommon()->GetLclOffs(); - unsigned argLclNum = BAD_VAR_NUM; - LclVarDsc* argVarDsc = nullptr; - - if (varDsc->lvPromoted) - { - argLclNum = lvaGetFieldLocal(varDsc, lclOffset); - } - else if (lclOffset == 0) - { - argLclNum = lclNum; - } - - // See if this local goes into the right register file. - // TODO-CQ: we could use a bitcast here, if it does not. - if (argLclNum != BAD_VAR_NUM) - { - argVarDsc = lvaGetDesc(argLclNum); - if ((genTypeSize(argVarDsc) != originalSize) || - !varTypeUsesSameRegType(argVarDsc, structBaseType)) - { - argLclNum = BAD_VAR_NUM; - } - } - - if (argLclNum != BAD_VAR_NUM) - { - argx->ChangeType(argVarDsc->TypeGet()); - argObj->SetOper(GT_LCL_VAR); - argObj->AsLclVar()->SetLclNum(argLclNum); - } - else if (varDsc->lvPromoted) - { - // Preserve independent promotion of "argObj" by decomposing the copy. - // TODO-CQ: condition this on the promotion actually being independent. - makeOutArgCopy = true; - } -#ifdef TARGET_AMD64 - else if (!argObj->OperIs(GT_LCL_VAR) || !argObj->TypeIs(TYP_SIMD8)) // Handled by lowering. -#else // !TARGET_ARM64 - else -#endif // !TARGET_ARM64 - { - // TODO-CQ: perform this transformation in lowering instead of here and - // avoid marking enregisterable structs DNER. - argx->ChangeType(structBaseType); - if (argObj->OperIs(GT_LCL_VAR)) - { - argObj->SetOper(GT_LCL_FLD); -#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - // Single-field structs passed according to floating-point calling convention may have - // padding in front of the field passed in a register - assert(arg.NewAbiInfo.NumSegments == 1); - // Single-slot structs passed according to integer calling convention also go through - // here but since they always have zero offset nothing should change - ABIPassingSegment seg = arg.NewAbiInfo.Segment(0); - assert((seg.IsPassedInRegister() && genIsValidFloatReg(seg.GetRegister())) || - (seg.Offset == 0)); - argObj->AsLclFld()->SetLclOffs(seg.Offset); -#endif - } - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg)); - } - } + if (makeOutArgCopy) + { + fgMakeOutgoingStructArgCopy(call, &arg); - assert(varTypeIsEnregisterable(argObj) || - (makeOutArgCopy && varTypeIsEnregisterable(structBaseType))); - } - else if (argObj->OperIs(GT_LCL_VAR) && lvaGetDesc(argObj->AsLclVar())->lvPromoted) + if (arg.GetEarlyNode() != nullptr) { - // Set DNER to block independent promotion. - lvaSetVarDoNotEnregister(argObj->AsLclVar()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::IsStructArg)); + flagsSummary |= arg.GetEarlyNode()->gtFlags; } } } - if (makeOutArgCopy) - { - fgMakeOutgoingStructArgCopy(call, &arg); - - if (arg.GetEarlyNode() != nullptr) - { - flagsSummary |= arg.GetEarlyNode()->gtFlags; - } - } - -#if FEATURE_MULTIREG_ARGS - if (!isStructArg) - { -#ifdef TARGET_ARM - if ((arg.AbiInfo.ArgType == TYP_LONG) || (arg.AbiInfo.ArgType == TYP_DOUBLE)) - { - assert((arg.AbiInfo.NumRegs == 2) || (arg.AbiInfo.GetStackSlotsNumber() == 2)); - } - else -#endif - { - // We must have exactly one register or slot. - assert(((arg.AbiInfo.NumRegs == 1) && (arg.AbiInfo.GetStackSlotsNumber() == 0)) || - ((arg.AbiInfo.NumRegs == 0) && (arg.AbiInfo.GetStackSlotsNumber() == 1))); - } - } -#endif - -#if defined(TARGET_X86) - if (isStructArg) - { - if (argx->OperIs(GT_LCL_VAR) && - (lvaGetPromotionType(argx->AsLclVar()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT)) - { - argx = fgMorphLclArgToFieldlist(argx->AsLclVar()); - arg.SetEarlyNode(argx); - } - else if (argx->OperIs(GT_LCL_FLD)) - { - lvaSetVarDoNotEnregister(argx->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); - } - } -#endif // TARGET_X86 - flagsSummary |= arg.GetEarlyNode()->gtFlags; } // end foreach argument loop @@ -2779,11 +2420,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) call->gtArgs.EvalArgsToTemps(this, call); } - if (hasMultiregStructArgs) - { - fgMorphMultiregStructArgs(call); - } - #ifdef DEBUG if (verbose) { @@ -2802,83 +2438,29 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #endif //----------------------------------------------------------------------------- -// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and -// call fgMorphMultiregStructArg on each of them. +// fgTryMorphStructArg: +// Given a varTypeIsStruct argument, try to morph it into a shape that the +// backend supports. // // Arguments: -// call : a GenTreeCall node that has one or more TYP_STRUCT arguments\. +// arg - The argument // -// Notes: -// We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types. -// It will ensure that the struct arguments are in the correct form. -// If this method fails to find any TYP_STRUCT arguments it will assert. -// -void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) -{ - bool foundStructArg = false; - GenTreeFlags flagsSummary = GTF_EMPTY; - -#ifdef TARGET_X86 - assert(!"Logic error: no MultiregStructArgs for X86"); -#endif -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI"); -#endif - - for (CallArg& arg : call->gtArgs.Args()) - { - if ((arg.AbiInfo.ArgType == TYP_STRUCT) && !arg.NewAbiInfo.IsPassedByReference()) - { - foundStructArg = true; - GenTree*& argx = arg.NodeRef(); - - if (!argx->OperIs(GT_FIELD_LIST)) - { - argx = fgMorphMultiregStructArg(&arg); - } - } - } - - // We should only call this method when we actually have one or more multireg struct args - assert(foundStructArg); - - // Update the flags - call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); -} - -//----------------------------------------------------------------------------- -// fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list, -// morph the argument as needed to be passed correctly. +// Returns: +// False if the argument cannot be put into a shape supported by the backend. // -// Arguments: -// arg - The argument containing a struct node. +// Remarks: +// The backend requires register-passed arguments to be of FIELD_LIST shape +// with one primitive node for each register. If the argument is passed in +// only one register, then the FIELD_LIST should be directly replaced by its +// operand. For stack-passed arguments the backend supports struct-typed +// arguments directly. // -// Notes: -// The arg node must be a GT_BLK or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT. -// If arg node is a lclVar passed on the stack, we will ensure that any lclVars that must be on the -// stack are marked as doNotEnregister, and then we return. -// -// If it is passed by register, we mutate the argument into the GT_FIELD_LIST form -// which is only used for struct arguments. -// -// If arg is a LclVar we check if it is struct promoted and has the right number of fields -// and if they are at the appropriate offsets we will use the struct promted fields -// in the GT_FIELD_LIST nodes that we create. -// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements -// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct -// this also forces the struct to be stack allocated into the local frame. -// For the GT_BLK case will clone the address expression and generate two (or more) -// indirections. -// -GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) +bool Compiler::fgTryMorphStructArg(CallArg* arg) { - GenTree* argNode = arg->GetNode(); + GenTree** use = GenTree::EffectiveUse(&arg->NodeRef()); + GenTree* argNode = *use; assert(varTypeIsStruct(argNode)); -#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - NYI("fgMorphMultiregStructArg requires implementation for this target"); -#endif - bool isSplit = arg->NewAbiInfo.IsSplitAcrossRegistersAndStack(); #ifdef TARGET_ARM if ((isSplit && (arg->NewAbiInfo.CountRegsAndStackSlots() > 4)) || @@ -2893,7 +2475,14 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // TODO-Arm-CQ: support decomposing "large" promoted structs into field lists. if (!isSplit) { - argNode = fgMorphLclArgToFieldlist(argNode->AsLclVar()); + GenTreeFieldList* fieldList = fgMorphLclArgToFieldList(argNode->AsLclVar()); + // TODO-Cleanup: The containment/reg optionality for x86 is + // conservative in the "no field list" case. +#ifdef TARGET_X86 + *use = fieldList; +#else + *use = fieldList->SoleFieldOrThis(); +#endif } else { @@ -2905,39 +2494,39 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { lvaSetVarDoNotEnregister(argNode->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); } + else if (argNode->OperIs(GT_BLK)) + { + ClassLayout* layout = argNode->AsBlk()->GetLayout(); - return argNode; - } + var_types primitiveType = layout->GetRegisterType(); + if (primitiveType != TYP_UNDEF) + { + JITDUMP("Converting argument [%06u] to primitive indirection\n", dspTreeID(argNode)); -#if FEATURE_MULTIREG_ARGS - ClassLayout* layout = argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(this) : nullptr; - unsigned structSize = argNode->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(argNode); + argNode->SetOper(GT_IND); + argNode->gtType = primitiveType; + } + } - if (layout != nullptr) - { - assert(ClassLayout::AreCompatible(typGetObjLayout(arg->GetSignatureClassHandle()), layout)); - } - else - { - assert(varTypeIsSIMD(argNode) && varTypeIsSIMD(arg->GetSignatureType())); + // Potentially update commas + arg->GetNode()->ChangeType((*use)->TypeGet()); + return true; } - // We should still have a TYP_STRUCT - assert(varTypeIsStruct(argNode)); - - GenTreeFieldList* newArg = nullptr; + GenTree* newArg = nullptr; - // Are we passing a struct LclVar? - // if (argNode->OperIs(GT_LCL_VAR)) { GenTreeLclVarCommon* lclNode = argNode->AsLclVarCommon(); unsigned lclNum = lclNode->GetLclNum(); LclVarDsc* varDsc = lvaGetDesc(lclNum); - varDsc->lvIsMultiRegArg = true; + if (!arg->NewAbiInfo.HasExactlyOneRegisterSegment()) + { + varDsc->lvIsMultiRegArg = true; + } - JITDUMP("Multireg struct argument V%02u : ", lclNum); + JITDUMP("Struct argument V%02u: ", lclNum); JITDUMPEXEC(arg->Dump(this)); // Try to see if we can use the promoted fields to pass this argument. @@ -2964,7 +2553,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { // TODO-CQ: We should be able to tolerate mismatches by inserting GT_BITCAST in lowering. // - JITDUMP("Multireg struct V%02u will be passed using GT_LCL_FLD because of type mismatch: " + JITDUMP("Struct V%02u will be passed using GT_LCL_FLD because of type mismatch: " "register type is %s, field local V%02u's type is %s\n", lclNum, varTypeName(regType), fieldLclNum, varTypeName(fieldType)); fieldsMatch = false; @@ -2986,19 +2575,80 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) if (fieldsMatch) { - newArg = fgMorphLclArgToFieldlist(lclNode); + newArg = fgMorphLclArgToFieldList(lclNode)->SoleFieldOrThis(); } } } + else if (argNode->OperIsFieldList()) + { + // We can already see a field list here if physical promotion created it. + // Physical promotion will also create single-field field lists which + // the backend does not expect, so fix that here. + newArg = argNode->AsFieldList()->SoleFieldOrThis(); + if (newArg == argNode) + { + return true; + } + } // If we were not able to use the promoted fields... // if (newArg == nullptr) { - if (!arg->NewAbiInfo.HasAnyRegisterSegment()) + bool isUseOfIndependentlyPromotedStruct = + argNode->OperIs(GT_LCL_VAR) && + (lvaGetPromotionType(argNode->AsLclVarCommon()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT); + if (isUseOfIndependentlyPromotedStruct) + { + if (arg->NewAbiInfo.HasExactlyOneRegisterSegment()) + { + // We already tried to use the fields above, but that failed. + // Here we prefer to create a copy to avoid DNER'ing the local. + // That turns out to be profitable mostly for single-register arguments. + return false; + } + } + else if (!argNode->TypeIs(TYP_STRUCT) && arg->NewAbiInfo.HasExactlyOneRegisterSegment()) + { + // This can be treated primitively. Leave it alone. + return true; + } + + if (!argNode->OperIsLocalRead() && !argNode->OperIsLoad()) { - // We leave this stack passed argument alone. - return argNode; + // A node we do not know how to turn into multiple registers. + // Usually HWINTRINSIC. Bail. + return false; + } + + ClassLayout* layout = argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(this) : nullptr; + unsigned structSize = argNode->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(argNode); + + if (layout != nullptr) + { + assert(ClassLayout::AreCompatible(typGetObjLayout(arg->GetSignatureClassHandle()), layout)); + } + else + { + assert(varTypeIsSIMD(argNode) && varTypeIsSIMD(arg->GetSignatureType())); + } + + if (argNode->OperIsLoad()) + { + unsigned lastLoadSize = structSize % TARGET_POINTER_SIZE; + if ((lastLoadSize != 0) && !isPow2(lastLoadSize)) + { + // Cannot read this size from a non-local. Bail. + return false; + } + + GenTree* indirAddr = argNode->AsIndir()->Addr(); + if (((indirAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) && + (arg->NewAbiInfo.CountRegsAndStackSlots() > 1)) + { + // Cannot create multiple uses of the address. Bail. + return false; + } } auto createSlotAccess = [=](unsigned offset, var_types type) -> GenTree* { @@ -3006,16 +2656,10 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) if (type == TYP_UNDEF) { - if ((structSize - offset) < TARGET_POINTER_SIZE) + unsigned sizeLeft = structSize - offset; + if (sizeLeft < TARGET_POINTER_SIZE) { - // ArgsComplete has made it so that for loads from memory - // we will only see the easily handleable cases here, For - // locals we may see odd sizes, but for those we can load - // "too much" from the stack frame, and thus can just round - // up the size. - assert(isPow2(structSize - offset) || argNode->OperIsLocalRead()); - - switch (structSize - offset) + switch (sizeLeft) { case 1: type = TYP_UBYTE; @@ -3038,7 +2682,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) } #ifdef TARGET_ARM64 - if (argNode->OperIsLocalRead()) + if ((offset > 0) && argNode->OperIsLocalRead()) { // For arm64 it's beneficial to consider all tails to // be TYP_I_IMPL to allow more ldp's. @@ -3058,10 +2702,28 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) if (argNode->OperIsLocalRead()) { - GenTreeLclFld* lclFld = gtNewLclFldNode(argNode->AsLclVarCommon()->GetLclNum(), genActualType(type), - argNode->AsLclVarCommon()->GetLclOffs() + offset); - lclFld->SetMorphed(this); - return lclFld; + GenTreeLclVarCommon* lclVar = argNode->AsLclVarCommon(); + LclVarDsc* dsc = lvaGetDesc(lclVar); + GenTree* result; + // We sometimes end up with struct reinterpretations where the + // retyping into a primitive allows us to replace by a scalar + // local here, so make sure we do that if possible. + if ((lclVar->GetLclOffs() == 0) && (offset == 0) && (genTypeSize(type) == genTypeSize(dsc)) && + varTypeUsesSameRegType(type, dsc)) + { + result = gtNewLclVarNode(lclVar->GetLclNum()); + } + else + { + result = gtNewLclFldNode(lclVar->GetLclNum(), type, lclVar->GetLclOffs() + offset); + + if (!dsc->lvDoNotEnregister) + { + lvaSetVarDoNotEnregister(lclVar->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + } + result->SetMorphed(this); + return result; } else { @@ -3075,8 +2737,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) } else { - assert((indirAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); - GenTree* indirAddrDup = gtCloneExpr(indirAddr); GenTree* offsetNode = gtNewIconNode(offset, TYP_I_IMPL); addr = gtNewOperNode(GT_ADD, indirAddr->TypeGet(), indirAddrDup, offsetNode); @@ -3100,7 +2760,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // createSlotAccess get the type from the layout. var_types slotType = varTypeUsesFloatReg(regType) ? regType : TYP_UNDEF; GenTree* access = createSlotAccess(seg.Offset, slotType); - newArg->AddField(this, access, seg.Offset, access->TypeGet()); + + newArg->AsFieldList()->AddField(this, access, seg.Offset, access->TypeGet()); } else { @@ -3108,33 +2769,26 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { unsigned layoutOffset = seg.Offset + slotOffset; GenTree* access = createSlotAccess(layoutOffset, TYP_UNDEF); - newArg->AddField(this, access, layoutOffset, access->TypeGet()); + + newArg->AsFieldList()->AddField(this, access, layoutOffset, access->TypeGet()); } } } - if (argNode->OperIsLocalRead()) - { - lvaSetVarDoNotEnregister(argNode->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - } + newArg = newArg->AsFieldList()->SoleFieldOrThis(); } - // If we reach here we should have set newArg to something - noway_assert(newArg != nullptr); - - JITDUMP("fgMorphMultiregStructArg created tree:\n"); + JITDUMP("fgTryMorphStructArg created tree:\n"); DISPTREE(newArg); - argNode = newArg; // consider calling fgMorphTree(newArg); - -#endif // FEATURE_MULTIREG_ARGS - - return argNode; + *use = newArg; + // Potentially update commas + arg->GetNode()->ChangeType((*use)->TypeGet()); + return true; } //------------------------------------------------------------------------ -// fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields +// fgMorphLclArgToFieldList: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields // // Arguments: // lcl - The GT_LCL_VAR node we will transform @@ -3142,7 +2796,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // Return value: // The new GT_FIELD_LIST that we have created. // -GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) +GenTreeFieldList* Compiler::fgMorphLclArgToFieldList(GenTreeLclVarCommon* lcl) { LclVarDsc* varDsc = lvaGetDesc(lcl); assert(varDsc->lvPromoted); @@ -3150,6 +2804,8 @@ GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) unsigned fieldLclNum = varDsc->lvFieldLclStart; GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + fieldList->SetMorphed(this); + for (unsigned i = 0; i < fieldCount; i++) { LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); @@ -3158,7 +2814,6 @@ GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); fieldLclNum++; } - fieldList->SetMorphed(this); return fieldList; } @@ -3322,29 +2977,42 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) GenTree* copyBlk = gtNewStoreLclVarNode(tmp, argx); copyBlk = fgMorphCopyBlock(copyBlk); + GenTree* argNode; + if (arg->NewAbiInfo.IsPassedByReference()) + { + argNode = gtNewLclVarAddrNode(tmp); + lvaSetVarAddrExposed(tmp DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); + } + else + { + argNode = gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet()); + } + argNode->SetMorphed(this); + #if FEATURE_FIXED_OUT_ARGS // For fixed out args we create the setup node here; EvalArgsToTemps knows // to handle the case of "already have a setup node" properly. arg->SetEarlyNode(copyBlk); - GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg, tmp); - argNode->SetMorphed(this); arg->SetLateNode(argNode); -#else // !FEATURE_FIXED_OUT_ARGS +#else // !FEATURE_FIXED_OUT_ARGS // Structs are always on the stack, and thus never need temps // so we have to put the copy and temp all into one expression. - GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg, tmp); - argNode->SetMorphed(this); - // Change the expression to "(tmp=val),tmp" argNode = gtNewOperNode(GT_COMMA, argNode->TypeGet(), copyBlk, argNode); argNode->SetMorphed(this); arg->SetEarlyNode(argNode); - #endif // !FEATURE_FIXED_OUT_ARGS + + if (!arg->NewAbiInfo.IsPassedByReference()) + { + bool morphed = fgTryMorphStructArg(arg); + // Should always succeed for an unpromoted local. + assert(morphed); + } } /*****************************************************************************