Skip to content

Commit

Permalink
JIT: Reorder indirs on arm64 to make them amenable to ldp optimization (
Browse files Browse the repository at this point in the history
#92768)

Add an ARM64 optimization in lowering that tries to reorder indirections to be near each other, making them amenable to ldp optimization done in codegen.
  • Loading branch information
jakobbotsch authored Oct 6, 2023
1 parent cdd3255 commit 15af764
Show file tree
Hide file tree
Showing 7 changed files with 451 additions and 25 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3046,7 +3046,7 @@ class Compiler
bool gtStoreDefinesField(
LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize);

void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq);
void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq = nullptr);

// Return true if call is a recursive call; return false otherwise.
// Note when inlining, this looks for calls back to the root method.
Expand Down
356 changes: 353 additions & 3 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,9 @@ GenTree* Lowering::LowerNode(GenTree* node)
{
case GT_NULLCHECK:
case GT_IND:
LowerIndir(node->AsIndir());
break;
{
return LowerIndir(node->AsIndir());
}

case GT_STOREIND:
LowerStoreIndirCommon(node->AsStoreInd());
Expand Down Expand Up @@ -7354,6 +7355,9 @@ void Lowering::LowerBlock(BasicBlock* block)
assert(block->isEmpty() || block->IsLIR());

m_block = block;
#ifdef TARGET_ARM64
m_blockIndirs.Reset();
#endif

// NOTE: some of the lowering methods insert calls before the node being
// lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In
Expand Down Expand Up @@ -8297,8 +8301,10 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
// Arguments:
// ind - the ind node we are lowering.
//
void Lowering::LowerIndir(GenTreeIndir* ind)
GenTree* Lowering::LowerIndir(GenTreeIndir* ind)
{
GenTree* next = ind->gtNext;

assert(ind->OperIs(GT_IND, GT_NULLCHECK));
// Process struct typed indirs separately unless they are unused;
// they only appear as the source of a block copy operation or a return node.
Expand Down Expand Up @@ -8345,8 +8351,352 @@ void Lowering::LowerIndir(GenTreeIndir* ind)
const bool isContainable = false;
TryCreateAddrMode(ind->Addr(), isContainable, ind);
}

#ifdef TARGET_ARM64
if (comp->opts.OptimizationEnabled() && ind->OperIs(GT_IND))
{
OptimizeForLdp(ind);
}
#endif

return next;
}

#ifdef TARGET_ARM64

// Max distance that we will try to move an indirection backwards to become
// adjacent to another indirection. As an empirical observation, increasing
// this number to 32 for the smoke_tests collection resulted in 3684 -> 3796
// cases passing the distance check, but 82 out of these 112 extra cases were
// then rejected due to interference. So 16 seems like a good number to balance
// the throughput costs.
const int LDP_REORDERING_MAX_DISTANCE = 16;

//------------------------------------------------------------------------
// OptimizeForLdp: Record information about an indirection, and try to optimize
// it by moving it to be adjacent with a previous indirection such that they
// can be transformed into 'ldp'.
//
// Arguments:
// ind - Indirection to record and to try to move.
//
// Returns:
// True if the optimization was successful.
//
bool Lowering::OptimizeForLdp(GenTreeIndir* ind)
{
if (!ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD8, TYP_SIMD16) || ind->IsVolatile())
{
return false;
}

target_ssize_t offs = 0;
GenTree* addr = ind->Addr();
comp->gtPeelOffsets(&addr, &offs);

if (!addr->OperIs(GT_LCL_VAR))
{
return false;
}

// Every indirection takes an expected 2+ nodes, so we only expect at most
// half the reordering distance to be candidates for the optimization.
int maxCount = min(m_blockIndirs.Height(), LDP_REORDERING_MAX_DISTANCE / 2);
for (int i = 0; i < maxCount; i++)
{
SavedIndir& prev = m_blockIndirs.TopRef(i);
if (prev.AddrBase->GetLclNum() != addr->AsLclVar()->GetLclNum())
{
continue;
}

GenTreeIndir* prevIndir = prev.Indir;
if ((prevIndir == nullptr) || (prevIndir->TypeGet() != ind->TypeGet()))
{
continue;
}

JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n",
Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, (unsigned)prev.Offset);
if (abs(offs - prev.Offset) == genTypeSize(ind))
{
JITDUMP(" ..and they are amenable to ldp optimization\n");
if (TryMakeIndirsAdjacent(prevIndir, ind))
{
// Do not let the previous one participate in
// another instance; that can cause us to e.g. convert
// *(x+4), *(x+0), *(x+8), *(x+12) =>
// *(x+4), *(x+8), *(x+0), *(x+12)
prev.Indir = nullptr;
return true;
}
break;
}
else
{
JITDUMP(" ..but at non-adjacent offset\n");
}
}

m_blockIndirs.Emplace(ind, addr->AsLclVar(), offs);
return false;
}

//------------------------------------------------------------------------
// TryMakeIndirsAdjacent: Try to prove that it is legal to move an indirection
// to be adjacent to a previous indirection. If successful, perform the move.
//
// Arguments:
// prevIndir - Previous indirection
// indir - Indirection to try to move to be adjacent to 'prevIndir'
//
// Returns:
// True if the optimization was successful.
//
bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir)
{
GenTree* cur = prevIndir;
for (int i = 0; i < LDP_REORDERING_MAX_DISTANCE; i++)
{
cur = cur->gtNext;
if (cur == indir)
break;

// We can reorder indirs with some calls, but introducing a LIR edge
// that spans a call can introduce spills (or callee-saves).
if (cur->IsCall() || (cur->OperIsStoreBlk() && (cur->AsBlk()->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper)))
{
JITDUMP(" ..but they are separated by node [%06u] that kills registers\n", Compiler::dspTreeID(cur));
return false;
}
}

if (cur != indir)
{
JITDUMP(" ..but they are too far separated\n");
return false;
}

JITDUMP(
" ..and they are close. Trying to move the following range (where * are nodes part of the data flow):\n\n");
#ifdef DEBUG
bool isClosed;
GenTree* startDumpNode = BlockRange().GetTreeRange(prevIndir, &isClosed).FirstNode();
GenTree* endDumpNode = indir->gtNext;

auto dumpWithMarks = [=]() {
if (!comp->verbose)
{
return;
}

for (GenTree* node = startDumpNode; node != endDumpNode; node = node->gtNext)
{
const char* prefix;
if (node == prevIndir)
prefix = "1. ";
else if (node == indir)
prefix = "2. ";
else if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
prefix = "* ";
else
prefix = " ";

comp->gtDispLIRNode(node, prefix);
}
};

#endif

MarkTree(indir);

INDEBUG(dumpWithMarks());
JITDUMP("\n");

m_scratchSideEffects.Clear();

for (GenTree* cur = prevIndir->gtNext; cur != indir; cur = cur->gtNext)
{
if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0)
{
// 'cur' is part of data flow of 'indir', so we will be moving the
// currently recorded effects past 'cur'.
if (m_scratchSideEffects.InterferesWith(comp, cur, true))
{
JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(cur));
UnmarkTree(indir);
return false;
}
}
else
{
// Not part of dataflow; add its effects that will move past
// 'indir'.
m_scratchSideEffects.AddNode(comp, cur);
}
}

if (m_scratchSideEffects.InterferesWith(comp, indir, true))
{
// Try a bit harder, making use of the following facts:
//
// 1. We know the indir is non-faulting, so we do not need to worry
// about reordering exceptions
//
// 2. We can reorder with non-volatile INDs even if they have
// GTF_ORDER_SIDEEFF; these indirs only have GTF_ORDER_SIDEEFF due to
// being non-faulting
//
// 3. We can also reorder with non-volatile STOREINDs if we can prove
// no aliasing. We can do that for two common cases:
// * The addresses are based on the same local but at distinct offset ranges
// * The addresses are based off TYP_REF bases at distinct offset ranges

JITDUMP("Have conservative interference with last indir. Trying a smarter interference check...\n");

GenTree* indirAddr = indir->Addr();
target_ssize_t offs = 0;
comp->gtPeelOffsets(&indirAddr, &offs);

bool checkLocal = indirAddr->OperIsLocal();
if (checkLocal)
{
unsigned lclNum = indirAddr->AsLclVarCommon()->GetLclNum();
checkLocal = !comp->lvaGetDesc(lclNum)->IsAddressExposed() && !m_scratchSideEffects.WritesLocal(lclNum);
}

// Helper lambda to check if a single node interferes with 'indir'.
auto interferes = [=](GenTree* node) {
if (((node->gtFlags & GTF_ORDER_SIDEEFF) != 0) && node->OperSupportsOrderingSideEffect())
{
// Cannot normally reorder GTF_ORDER_SIDEEFF and GTF_GLOB_REF,
// except for some of the known cases described above.
if (!node->OperIs(GT_IND, GT_BLK, GT_STOREIND, GT_STORE_BLK) || node->AsIndir()->IsVolatile())
{
return true;
}
}

AliasSet::NodeInfo nodeInfo(comp, node);

if (nodeInfo.WritesAddressableLocation())
{
if (!node->OperIs(GT_STOREIND, GT_STORE_BLK))
{
return true;
}

GenTreeIndir* store = node->AsIndir();
GenTree* storeAddr = store->Addr();
target_ssize_t storeOffs = 0;
comp->gtPeelOffsets(&storeAddr, &storeOffs);

bool distinct = (storeOffs + (target_ssize_t)store->Size() <= offs) ||
(offs + (target_ssize_t)indir->Size() <= storeOffs);

if (checkLocal && GenTree::Compare(indirAddr, storeAddr) && distinct)
{
JITDUMP("Cannot interfere with [%06u] since they are off the same local V%02u and indir range "
"[%03u..%03u) does not interfere with store range [%03u..%03u)\n",
Compiler::dspTreeID(node), indirAddr->AsLclVarCommon()->GetLclNum(), (unsigned)offs,
(unsigned)offs + indir->Size(), (unsigned)storeOffs, (unsigned)storeOffs + store->Size());
}
// Two indirs off of TYP_REFs cannot overlap if their offset ranges are distinct.
else if (indirAddr->TypeIs(TYP_REF) && storeAddr->TypeIs(TYP_REF) && distinct)
{
JITDUMP("Cannot interfere with [%06u] since they are both off TYP_REF bases and indir range "
"[%03u..%03u) does not interfere with store range [%03u..%03u)\n",
Compiler::dspTreeID(node), (unsigned)offs, (unsigned)offs + indir->Size(),
(unsigned)storeOffs, (unsigned)storeOffs + store->Size());
}
else
{
return true;
}
}

return false;
};

for (GenTree* cur = indir->gtPrev; cur != prevIndir; cur = cur->gtPrev)
{
if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0)
{
continue;
}

if (interferes(cur))
{
JITDUMP("Indir [%06u] interferes with [%06u]\n", Compiler::dspTreeID(indir), Compiler::dspTreeID(cur));
UnmarkTree(indir);
return false;
}
}
}

JITDUMP("Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n",
Compiler::dspTreeID(indir));

GenTree* previous = prevIndir;
for (GenTree* node = prevIndir->gtNext;;)
{
GenTree* next = node->gtNext;

if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
{
// Part of data flow. Move it to happen right after 'previous'.
BlockRange().Remove(node);
BlockRange().InsertAfter(previous, node);
previous = node;
}

if (node == indir)
{
break;
}

node = next;
}

JITDUMP("Result:\n\n");
INDEBUG(dumpWithMarks());
JITDUMP("\n");
UnmarkTree(indir);
return true;
}

//------------------------------------------------------------------------
// MarkTree: Mark trees involved in the computation of 'node' recursively.
//
// Arguments:
// node - Root node.
//
void Lowering::MarkTree(GenTree* node)
{
node->gtLIRFlags |= LIR::Flags::Mark;
node->VisitOperands([=](GenTree* op) {
MarkTree(op);
return GenTree::VisitResult::Continue;
});
}

//------------------------------------------------------------------------
// UnmarkTree: Unmark trees involved in the computation of 'node' recursively.
//
// Arguments:
// node - Root node.
//
void Lowering::UnmarkTree(GenTree* node)
{
node->gtLIRFlags &= ~LIR::Flags::Mark;
node->VisitOperands([=](GenTree* op) {
UnmarkTree(op);
return GenTree::VisitResult::Continue;
});
}

#endif // TARGET_ARM64

//------------------------------------------------------------------------
// TransformUnusedIndirection: change the opcode and the type of the unused indirection.
//
Expand Down
Loading

0 comments on commit 15af764

Please sign in to comment.