Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix arm64 funclet frame type 5 #70922

Merged
merged 5 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ class CodeGen final : public CodeGenInterface
virtual void SetSaveFpLrWithAllCalleeSavedRegisters(bool value);
virtual bool IsSaveFpLrWithAllCalleeSavedRegisters() const;
bool genSaveFpLrWithAllCalleeSavedRegisters;
bool genForceFuncletFrameType5;
#endif // TARGET_ARM64

//-------------------------------------------------------------------------
Expand Down
103 changes: 78 additions & 25 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
JITDUMP("Frame type 5 (save FP/LR at top). #outsz=%d; #framesz=%d; localloc? %s\n",
unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed));

assert(genSaveFpLrWithAllCalleeSavedRegisters);
assert((calleeSaveSpOffset == 0) || (calleeSaveSpOffset == REGSIZE_BYTES));

// Restore sp from fp:
Expand Down Expand Up @@ -1077,10 +1078,14 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* | incoming arguments |
* +=======================+ <---- Caller's SP
* | OSR padding | // If required
* |-----------------------|
* | Varargs regs space | // Only for varargs main functions; 64 bytes
* |-----------------------|
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
* | MonitorAcquired | // 8 bytes; for synchronized methods
* |-----------------------|
* | PSP slot | // 8 bytes (omitted in NativeAOT ABI)
* |-----------------------|
* ~ alignment padding ~ // To make the whole frame 16 byte aligned.
Expand All @@ -1104,10 +1109,14 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* | incoming arguments |
* +=======================+ <---- Caller's SP
* | OSR padding | // If required
* |-----------------------|
* | Varargs regs space | // Only for varargs main functions; 64 bytes
* |-----------------------|
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
* | MonitorAcquired | // 8 bytes; for synchronized methods
* |-----------------------|
* | PSP slot | // 8 bytes (omitted in NativeAOT ABI)
* |-----------------------|
* ~ alignment padding ~ // To make the whole frame 16 byte aligned.
Expand All @@ -1134,20 +1143,24 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* | incoming arguments |
* +=======================+ <---- Caller's SP
* | OSR padding | // If required
* |-----------------------|
* | Varargs regs space | // Only for varargs main functions; 64 bytes
* |-----------------------|
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
* | MonitorAcquired | // 8 bytes; for synchronized methods
* |-----------------------|
* | PSP slot | // 8 bytes (omitted in NativeAOT ABI)
* |-----------------------|
* ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned
* |-----------------------|
* | Saved FP, LR | // 16 bytes
* | Saved FP, LR | // 16 bytes <-- SP after first adjustment (points at saved FP)
* |-----------------------|
* ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space).
* |-----------------------|
* | Outgoing arg space | // multiple of 8 bytes
* |-----------------------| <---- Ambient SP
* |-----------------------| <---- Ambient SP (SP after second adjustment)
* | | |
* ~ | Stack grows ~
* | | downward |
Expand All @@ -1162,7 +1175,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* 8 float callee-saved registers v8-v15
* 8 saved integer argument registers x0-x7, if varargs function
* 1 PSP slot
* 1 alignment slot
* 1 alignment slot or monitor acquired slot
* == 30 slots * 8 bytes = 240 bytes.
*
* The outgoing argument size, however, can be very large, if we call a function that takes a large number of
Expand Down Expand Up @@ -1198,6 +1211,8 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* | incoming arguments |
* +=======================+ <---- Caller's SP
* | OSR padding | // If required
* |-----------------------|
* | Varargs regs space | // Only for varargs main functions; 64 bytes
* |-----------------------|
* | Saved LR | // 8 bytes
Expand All @@ -1206,6 +1221,8 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
* | MonitorAcquired | // 8 bytes; for synchronized methods
* |-----------------------|
* | PSP slot | // 8 bytes (omitted in NativeAOT ABI)
* |-----------------------|
* ~ alignment padding ~ // To make the whole frame 16 byte aligned.
Expand Down Expand Up @@ -1235,6 +1252,8 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* | incoming arguments |
* +=======================+ <---- Caller's SP
* | OSR padding | // If required
* |-----------------------|
* | Varargs regs space | // Only for varargs main functions; 64 bytes
* |-----------------------|
* | Saved LR | // 8 bytes
Expand All @@ -1243,14 +1262,16 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
* | MonitorAcquired | // 8 bytes; for synchronized methods
* |-----------------------|
* | PSP slot | // 8 bytes (omitted in NativeAOT ABI)
* |-----------------------|
* ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned
* ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned <-- SP after first adjustment (points at alignment padding or PSP slot)
* |-----------------------|
* ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space).
* |-----------------------|
* | Outgoing arg space | // multiple of 8 bytes
* |-----------------------| <---- Ambient SP
* |-----------------------| <---- Ambient SP (SP after second adjustment)
* | | |
* ~ | Stack grows ~
* | | downward |
Expand Down Expand Up @@ -1310,6 +1331,8 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* ldp fp,lr,[sp],#framesz
* ret lr
*
* See CodeGen::genPushCalleeSavedRegisters() for a description of the main function frame layout.
* See Compiler::lvaAssignVirtualFrameOffsetsToLocals() for calculation of main frame local variable offsets.
*/
// clang-format on

Expand Down Expand Up @@ -1431,7 +1454,9 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
}
else
{
// Nothing to do here; the first SP adjustment will be done by saving the callee-saved registers.
assert(genFuncletInfo.fiSpDelta1 < 0);
assert(genFuncletInfo.fiSpDelta1 >= -240);
genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr, /* reportUnwindData */ true);
}
}

Expand Down Expand Up @@ -1501,6 +1526,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
/*****************************************************************************
*
* Generates code for an EH funclet epilog.
*
* See the description of frame shapes at genFuncletProlog().
*/

void CodeGen::genFuncletEpilog()
Expand Down Expand Up @@ -1675,6 +1702,9 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
if (compiler->opts.IsOSR() && (PSPSize > 0))
{
osrPad = compiler->info.compPatchpointInfo->TotalFrameSize();

// OSR pad must be already aligned to stack size.
assert((osrPad % STACK_ALIGN) == 0);
}

genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad;
Expand Down Expand Up @@ -1705,39 +1735,50 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
unsigned const outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);

unsigned const maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + osrPad + outgoingArgSpaceAligned;
assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
// If do two SP adjustments, each one must be aligned. This represents the largest possible stack size, if two
// separate alignment slots are required.
unsigned const twoSpAdjustmentFuncletFrameSizeAligned =
osrPad + saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
assert((twoSpAdjustmentFuncletFrameSizeAligned % STACK_ALIGN) == 0);

int SP_to_FPLR_save_delta;
int SP_to_PSP_slot_delta;
int CallerSP_to_PSP_slot_delta;

unsigned const funcletFrameSize = saveRegsPlusPSPSize + osrPad + compiler->lvaOutgoingArgSpaceSize;
unsigned const funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);

unsigned const funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
// Are we stressing frame type 5? Don't do it unless we have non-zero outgoing arg space.
const bool useFrameType5 =
genSaveFpLrWithAllCalleeSavedRegisters && genForceFuncletFrameType5 && (compiler->lvaOutgoingArgSpaceSize > 0);

if (maxFuncletFrameSizeAligned <= 512)
if ((twoSpAdjustmentFuncletFrameSizeAligned <= 512) && !useFrameType5)
{
unsigned const oneSpAdjustmentFuncletFrameSize =
osrPad + saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
unsigned const oneSpAdjustmentFuncletFrameSizeAligned = roundUp(oneSpAdjustmentFuncletFrameSize, STACK_ALIGN);
assert(oneSpAdjustmentFuncletFrameSizeAligned <= twoSpAdjustmentFuncletFrameSizeAligned);

unsigned const oneSpAdjustmentFuncletFrameSizeAlignmentPad =
oneSpAdjustmentFuncletFrameSizeAligned - oneSpAdjustmentFuncletFrameSize;
assert((oneSpAdjustmentFuncletFrameSizeAlignmentPad == 0) ||
(oneSpAdjustmentFuncletFrameSizeAlignmentPad == REGSIZE_BYTES));

if (genSaveFpLrWithAllCalleeSavedRegisters)
{
SP_to_FPLR_save_delta = funcletFrameSizeAligned - (2 /* FP, LR */ * REGSIZE_BYTES);
SP_to_FPLR_save_delta = oneSpAdjustmentFuncletFrameSizeAligned - (2 /* FP, LR */ * REGSIZE_BYTES);
if (compiler->info.compIsVarArgs)
{
SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES;
}

SP_to_PSP_slot_delta = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad + osrPad;
SP_to_PSP_slot_delta = compiler->lvaOutgoingArgSpaceSize + oneSpAdjustmentFuncletFrameSizeAlignmentPad;
CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize);

genFuncletInfo.fiFrameType = 4;
}
else
{
SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize;
SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
SP_to_PSP_slot_delta =
SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + oneSpAdjustmentFuncletFrameSizeAlignmentPad;
CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);

if (compiler->lvaOutgoingArgSpaceSize == 0)
Expand All @@ -1750,26 +1791,25 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
}
}

genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
genFuncletInfo.fiSpDelta1 = -(int)oneSpAdjustmentFuncletFrameSizeAligned;
genFuncletInfo.fiSpDelta2 = 0;

assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned);
assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)oneSpAdjustmentFuncletFrameSizeAligned);
}
else
{
unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
unsigned const saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));

if (genSaveFpLrWithAllCalleeSavedRegisters)
{
SP_to_FPLR_save_delta = funcletFrameSizeAligned - (2 /* FP, LR */ * REGSIZE_BYTES);
SP_to_FPLR_save_delta = twoSpAdjustmentFuncletFrameSizeAligned - (2 /* FP, LR */ * REGSIZE_BYTES);
if (compiler->info.compIsVarArgs)
{
SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES;
}

SP_to_PSP_slot_delta =
compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad + saveRegsPlusPSPAlignmentPad;
SP_to_PSP_slot_delta = outgoingArgSpaceAligned + saveRegsPlusPSPAlignmentPad;
CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize);

genFuncletInfo.fiFrameType = 5;
Expand All @@ -1787,7 +1827,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
genFuncletInfo.fiSpDelta1 = -(int)(osrPad + saveRegsPlusPSPSizeAligned);
genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned;

assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned);
assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)twoSpAdjustmentFuncletFrameSizeAligned);
}

/* Now save it for future use */
Expand Down Expand Up @@ -4528,6 +4568,19 @@ void CodeGen::SetSaveFpLrWithAllCalleeSavedRegisters(bool value)
{
JITDUMP("Setting genSaveFpLrWithAllCalleeSavedRegisters to %s\n", dspBool(value));
genSaveFpLrWithAllCalleeSavedRegisters = value;

if (genSaveFpLrWithAllCalleeSavedRegisters)
{
// We'll use frame type 4 or 5. Frame type 5 only occurs if there is a very large outgoing argument
// space. This is extremely rare, so under stress force using this frame type. However, frame type 5
// isn't used if there is no outgoing argument space; this is checked elsewhere.

if ((compiler->opts.compJitSaveFpLrWithCalleeSavedRegisters == 3) ||
compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 50))
{
genForceFuncletFrameType5 = true;
}
}
}

//---------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4651,7 +4651,7 @@ void CodeGen::genPushCalleeSavedRegisters()
// |-----------------------|
// |Callee saved registers | // not including FP/LR; multiple of 8 bytes
// |-----------------------|
// | MonitorAcquired |
// | MonitorAcquired | // 8 bytes; for synchronized methods
// |-----------------------|
// | PSP slot | // 8 bytes (omitted in NativeAOT ABI)
// |-----------------------|
Expand Down Expand Up @@ -4684,7 +4684,7 @@ void CodeGen::genPushCalleeSavedRegisters()
// |-----------------------|
// |Callee saved registers | // not including FP/LR; multiple of 8 bytes
// |-----------------------|
// | MonitorAcquired |
// | MonitorAcquired | // 8 bytes; for synchronized methods
// |-----------------------|
// | PSP slot | // 8 bytes (omitted in NativeAOT ABI)
// |-----------------------|
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)

#ifdef TARGET_ARM64
genSaveFpLrWithAllCalleeSavedRegisters = false;
genForceFuncletFrameType5 = false;
#endif // TARGET_ARM64
}

Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,8 @@ CONFIG_STRING(JitFunctionFile, W("JitFunctionFile"))
// 1: disable frames that save FP/LR registers with the callee-saved registers (at the top of the frame)
// 2: force all frames to use the frame types that save FP/LR registers with the callee-saved registers (at the top
// of the frame)
// 3: force all frames to use the frame types that save FP/LR registers with the callee-saved registers (at the top
// of the frame) and also force using the large funclet frame variation (frame 5) if possible.
CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSavedRegisters"), 0)
#endif // defined(TARGET_ARM64)

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6385,7 +6385,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(false); // Disable using new frames
}
else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 2)
else if ((opts.compJitSaveFpLrWithCalleeSavedRegisters == 2) || (opts.compJitSaveFpLrWithCalleeSavedRegisters == 3))
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); // Force using new frames
}
Expand Down
Loading