Skip to content

Commit e2b0d5d

Browse files
authored
[SandboxVec][Scheduler] Enforce scheduling SchedBundle instrs back-to-back (llvm#128092)
This patch fixes the behavior of the scheduler by making sure the instrs that are part of a SchedBundle are scheduled back-to-back.
1 parent 403b7b6 commit e2b0d5d

File tree

5 files changed

+127
-31
lines changed

5 files changed

+127
-31
lines changed

llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h

+4
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ class SchedBundle {
150150
DGNode *getBot() const;
151151
/// Move all bundle instructions to \p Where back-to-back.
152152
void cluster(BasicBlock::iterator Where);
153+
/// \Returns true if all nodes in the bundle are ready.
154+
bool ready() const {
155+
return all_of(Nodes, [](const auto *N) { return N->ready(); });
156+
}
153157
#ifndef NDEBUG
154158
void dump(raw_ostream &OS) const;
155159
LLVM_DUMP_METHOD void dump() const;

llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp

+68-23
Original file line numberDiff line numberDiff line change
@@ -125,36 +125,80 @@ SchedBundle *Scheduler::createBundle(ArrayRef<Instruction *> Instrs) {
125125
void Scheduler::eraseBundle(SchedBundle *SB) { Bndls.erase(SB); }
126126

127127
bool Scheduler::tryScheduleUntil(ArrayRef<Instruction *> Instrs) {
128-
// Use a set of instructions, instead of `Instrs` for fast lookups.
129-
DenseSet<Instruction *> InstrsToDefer(Instrs.begin(), Instrs.end());
130-
// This collects the nodes that correspond to instructions found in `Instrs`
131-
// that have just become ready. These nodes won't be scheduled right away.
132-
SmallVector<DGNode *, 8> DeferredNodes;
133-
128+
// Create a bundle for Instrs. If it turns out the schedule is infeasible we
129+
// will dismantle it.
130+
auto *InstrsSB = createBundle(Instrs);
134131
// Keep scheduling ready nodes until we either run out of ready nodes (i.e.,
135132
// ReadyList is empty), or all nodes that correspond to `Instrs` (the nodes of
136133
// which are collected in DeferredNodes) are all ready to schedule.
137-
while (!ReadyList.empty()) {
138-
auto *ReadyN = ReadyList.pop();
139-
if (InstrsToDefer.contains(ReadyN->getInstruction())) {
140-
// If the ready instruction is one of those in `Instrs`, then we don't
141-
// schedule it right away. Instead we defer it until we can schedule it
142-
// along with the rest of the instructions in `Instrs`, at the same
143-
// time in a single scheduling bundle.
144-
DeferredNodes.push_back(ReadyN);
145-
bool ReadyToScheduleDeferred = DeferredNodes.size() == Instrs.size();
146-
if (ReadyToScheduleDeferred) {
147-
scheduleAndUpdateReadyList(*createBundle(Instrs));
134+
SmallVector<DGNode *> Retry;
135+
bool KeepScheduling = true;
136+
while (KeepScheduling) {
137+
enum class TryScheduleRes {
138+
Success, ///> We successfully scheduled the bundle.
139+
Failure, ///> We failed to schedule the bundle.
140+
Finished, ///> We successfully scheduled the bundle and it is the last
141+
/// bundle to be scheduled.
142+
};
143+
/// TryScheduleNode() attempts to schedule all DAG nodes in the bundle that
144+
/// ReadyN is in. If it's not in a bundle it will create a singleton bundle
145+
/// and will try to schedule it.
146+
auto TryScheduleBndl = [this, InstrsSB](DGNode *ReadyN) -> TryScheduleRes {
147+
auto *SB = ReadyN->getSchedBundle();
148+
if (SB == nullptr) {
149+
// If ReadyN does not belong to a bundle, create a singleton bundle
150+
// and schedule it.
151+
auto *SingletonSB = createBundle({ReadyN->getInstruction()});
152+
scheduleAndUpdateReadyList(*SingletonSB);
153+
return TryScheduleRes::Success;
154+
}
155+
if (SB->ready()) {
156+
// Remove the rest of the bundle from the ready list.
157+
// TODO: Perhaps change the Scheduler + ReadyList to operate on
158+
// SchedBundles instead of DGNodes.
159+
for (auto *N : *SB) {
160+
if (N != ReadyN)
161+
ReadyList.remove(N);
162+
}
163+
// If all nodes in the bundle are ready.
164+
scheduleAndUpdateReadyList(*SB);
165+
if (SB == InstrsSB)
166+
// We just scheduled InstrsSB bundle, so we are done scheduling.
167+
return TryScheduleRes::Finished;
168+
return TryScheduleRes::Success;
169+
}
170+
return TryScheduleRes::Failure;
171+
};
172+
while (!ReadyList.empty()) {
173+
auto *ReadyN = ReadyList.pop();
174+
auto Res = TryScheduleBndl(ReadyN);
175+
switch (Res) {
176+
case TryScheduleRes::Success:
177+
// We successfully scheduled ReadyN, keep scheduling.
178+
continue;
179+
case TryScheduleRes::Failure:
180+
// We failed to schedule ReadyN, defer it to later and keep scheduling
181+
// other ready instructions.
182+
Retry.push_back(ReadyN);
183+
continue;
184+
case TryScheduleRes::Finished:
185+
// We successfully scheduled the instruction bundle, so we are done.
148186
return true;
149187
}
150-
} else {
151-
// If the ready instruction is not found in `Instrs`, then we wrap it in a
152-
// scheduling bundle and schedule it right away.
153-
scheduleAndUpdateReadyList(*createBundle({ReadyN->getInstruction()}));
188+
llvm_unreachable("Unhandled TrySchedule() result");
189+
}
190+
// Try to schedule nodes from the Retry list.
191+
KeepScheduling = false;
192+
for (auto *N : make_early_inc_range(Retry)) {
193+
auto Res = TryScheduleBndl(N);
194+
if (Res == TryScheduleRes::Success) {
195+
Retry.erase(find(Retry, N));
196+
KeepScheduling = true;
197+
}
154198
}
155199
}
156-
assert(DeferredNodes.size() != Instrs.size() &&
157-
"We should have succesfully scheduled and early-returned!");
200+
201+
eraseBundle(InstrsSB);
158202
return false;
159203
}
160204

@@ -275,6 +319,7 @@ bool Scheduler::trySchedule(ArrayRef<Instruction *> Instrs) {
275319
// If one or more instrs are already scheduled we need to destroy the
276320
// top-most part of the schedule that includes the instrs in the bundle and
277321
// re-schedule.
322+
DAG.extend(Instrs);
278323
trimSchedule(Instrs);
279324
ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator());
280325
return tryScheduleUntil(Instrs);

llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -360,8 +360,8 @@ define void @vecInstrsPlacement(ptr %ptr0) {
360360
; CHECK-SAME: ptr [[PTR0:%.*]]) {
361361
; CHECK-NEXT: [[VECL2:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
362362
; CHECK-NEXT: [[VECL:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
363-
; CHECK-NEXT: [[VEC2:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
364-
; CHECK-NEXT: [[VEC:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
363+
; CHECK-NEXT: [[VEC2:%.*]] = fmul <2 x double> [[VECL2]], [[VECL]]
364+
; CHECK-NEXT: [[VEC:%.*]] = fmul <2 x double> [[VECL2]], [[VECL]]
365365
; CHECK-NEXT: [[VEC5:%.*]] = fadd <2 x double> [[VEC]], [[VEC2]]
366366
; CHECK-NEXT: store <2 x double> [[VEC5]], ptr [[PTR0]], align 8
367367
; CHECK-NEXT: ret void

llvm/test/Transforms/SandboxVectorizer/scheduler.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) {
77
; CHECK-LABEL: define void @check_dag_scheduler_update(
88
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) {
99
; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P]], align 4
10-
; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
11-
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 34
10+
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32
1211
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
1312
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33
1413
; CHECK-NEXT: [[I4:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
15-
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 32
14+
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 34
1615
; CHECK-NEXT: [[I6:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
1716
; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0
18-
; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I6]], i32 1
17+
; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I2]], i32 1
1918
; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[I4]], i32 2
20-
; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I2]], i32 3
19+
; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I6]], i32 3
20+
; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
2121
; CHECK-NEXT: [[VEC:%.*]] = add nsw <4 x i32> [[PACK3]], [[VECL]]
2222
; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[P1]], align 4
2323
; CHECK-NEXT: ret void
@@ -56,9 +56,9 @@ define <4 x float> @check_top_of_schedule(ptr %0) {
5656
; CHECK-LABEL: define <4 x float> @check_top_of_schedule(
5757
; CHECK-SAME: ptr [[TMP0:%.*]]) {
5858
; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0
59+
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1
5960
; CHECK-NEXT: [[TRUNC_1:%.*]] = fptrunc double 0.000000e+00 to float
6061
; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float [[TRUNC_1]], i64 0
61-
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1
6262
; CHECK-NEXT: store <2 x double> <double 0.000000e+00, double 1.000000e+00>, ptr [[GEP_1]], align 8
6363
; CHECK-NEXT: ret <4 x float> [[INS_2]]
6464
;

llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp

+47
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,53 @@ define void @foo(ptr noalias %ptr0, ptr noalias %ptr1, i8 %arg) {
324324
EXPECT_TRUE(Sched.trySchedule({L0, L1}));
325325
}
326326

327+
// Make sure that instructions in SchedBundles are always scheduled
328+
// back-to-back
329+
TEST_F(SchedulerTest, SchedBundleBackToBack) {
330+
parseIR(C, R"IR(
331+
define void @foo(ptr %ptr, i16 %arg) {
332+
%gep0 = getelementptr i32, ptr %ptr, i64 0
333+
%gep1 = getelementptr i32, ptr %ptr, i64 1
334+
%zextX = zext i16 0 to i32
335+
%zext1 = zext i16 0 to i32
336+
%zext0 = zext i16 %arg to i32
337+
%shl1 = shl i32 %zextX, 0
338+
%shl0 = shl i32 %zext1, 0
339+
%sub1 = sub i32 %zext1, %shl1
340+
%sub0 = sub i32 %zext0, %shl0
341+
store i32 %sub1, ptr %gep1
342+
store i32 %sub0, ptr %gep0
343+
ret void
344+
})IR");
345+
llvm::Function *LLVMF = &*M->getFunction("foo");
346+
sandboxir::Context Ctx(C);
347+
auto *F = Ctx.createFunction(LLVMF);
348+
auto *BB = &*F->begin();
349+
auto It = BB->begin();
350+
auto *Gep0 = cast<sandboxir::GetElementPtrInst>(&*It++);
351+
auto *Gep1 = cast<sandboxir::GetElementPtrInst>(&*It++);
352+
auto *ZextX = cast<sandboxir::CastInst>(&*It++);
353+
auto *Zext1 = cast<sandboxir::CastInst>(&*It++);
354+
auto *Zext0 = cast<sandboxir::CastInst>(&*It++);
355+
auto *Shl1 = cast<sandboxir::BinaryOperator>(&*It++);
356+
auto *Shl0 = cast<sandboxir::BinaryOperator>(&*It++);
357+
auto *Sub1 = cast<sandboxir::BinaryOperator>(&*It++);
358+
auto *Sub0 = cast<sandboxir::BinaryOperator>(&*It++);
359+
auto *S0 = cast<sandboxir::StoreInst>(&*It++);
360+
auto *S1 = cast<sandboxir::StoreInst>(&*It++);
361+
362+
sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx);
363+
EXPECT_TRUE(Sched.trySchedule({S0, S1}));
364+
EXPECT_TRUE(Sched.trySchedule({Zext0, Zext1}));
365+
EXPECT_TRUE(Sched.trySchedule({Shl0, Shl1}));
366+
auto BackToBack = [](sandboxir::Instruction *I1, sandboxir::Instruction *I2) {
367+
return I1->getNextNode() == I2 || I2->getNextNode() == I1;
368+
};
369+
EXPECT_TRUE(BackToBack(S0, S1));
370+
EXPECT_TRUE(BackToBack(Zext0, Zext1));
371+
EXPECT_TRUE(BackToBack(Shl0, Shl1));
372+
}
373+
327374
// Test that an instruction can't belong in two bundles!
328375
TEST_F(SchedulerTest, CheckBundles) {
329376
parseIR(C, R"IR(

0 commit comments

Comments
 (0)