Skip to content

Commit 62994c3

Browse files
committed
[VPlan] Also introduce explicit broadcasts for values from entry VPBB.
Update and generalize materializeBroadcasts to also introduce explicit broadcasts for VPValues defined in the Plans Entry block. This fixes a crash when trying to insert the broadcasts generated by VPTransformState::get after the generating instruction, which isn't possible after invoke instructions. Fixes llvm#128838.
1 parent 9c65e6a commit 62994c3

10 files changed

+139
-49
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -7691,11 +7691,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76917691
((VectorizingEpilogue && ExpandedSCEVs) ||
76927692
(!VectorizingEpilogue && !ExpandedSCEVs)) &&
76937693
"expanded SCEVs to reuse can only be used during epilogue vectorization");
7694-
VPlanTransforms::materializeLiveInBroadcasts(BestVPlan);
76957694
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
76967695
// cost model is complete for better cost estimates.
76977696
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
76987697
OrigLoop->getHeader()->getContext());
7698+
VPlanTransforms::materializeBroadcasts(BestVPlan);
76997699
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
77007700
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
77017701
VPlanTransforms::removeDeadRecipes(BestVPlan);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+19-12
Original file line numberDiff line numberDiff line change
@@ -2180,7 +2180,7 @@ void VPlanTransforms::handleUncountableEarlyExit(
21802180
LatchExitingBranch->eraseFromParent();
21812181
}
21822182

2183-
void VPlanTransforms::materializeLiveInBroadcasts(VPlan &Plan) {
2183+
void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
21842184
if (Plan.hasScalarVFOnly())
21852185
return;
21862186

@@ -2189,18 +2189,25 @@ void VPlanTransforms::materializeLiveInBroadcasts(VPlan &Plan) {
21892189
VPDT.recalculate(Plan);
21902190
#endif
21912191

2192+
SmallVector<VPValue *> VPValues;
2193+
append_range(VPValues, Plan.getLiveIns());
2194+
for (VPRecipeBase &R : *Plan.getEntry())
2195+
append_range(VPValues, R.definedValues());
2196+
21922197
auto *VectorPreheader = Plan.getVectorPreheader();
2193-
for (VPValue *LiveIn : Plan.getLiveIns()) {
2194-
if (all_of(LiveIn->users(),
2195-
[LiveIn](VPUser *U) { return U->usesScalars(LiveIn); }) ||
2196-
!LiveIn->getLiveInIRValue() ||
2197-
isa<Constant>(LiveIn->getLiveInIRValue()))
2198+
for (VPValue *VPV : VPValues) {
2199+
if (all_of(VPV->users(),
2200+
[VPV](VPUser *U) { return U->usesScalars(VPV); }) ||
2201+
(VPV->isLiveIn() &&
2202+
(!VPV->getLiveInIRValue() || isa<Constant>(VPV->getLiveInIRValue()))))
21982203
continue;
21992204

22002205
// Add explicit broadcast at the insert point that dominates all users.
22012206
VPBasicBlock *HoistBlock = VectorPreheader;
22022207
VPBasicBlock::iterator HoistPoint = VectorPreheader->end();
2203-
for (VPUser *User : LiveIn->users()) {
2208+
for (VPUser *User : VPV->users()) {
2209+
if (User->usesScalars(VPV))
2210+
continue;
22042211
if (cast<VPRecipeBase>(User)->getParent() == VectorPreheader)
22052212
HoistPoint = HoistBlock->begin();
22062213
else
@@ -2210,10 +2217,10 @@ void VPlanTransforms::materializeLiveInBroadcasts(VPlan &Plan) {
22102217
}
22112218

22122219
VPBuilder Builder(cast<VPBasicBlock>(HoistBlock), HoistPoint);
2213-
auto *Broadcast = Builder.createNaryOp(VPInstruction::Broadcast, {LiveIn});
2214-
LiveIn->replaceUsesWithIf(
2215-
Broadcast, [LiveIn, Broadcast](VPUser &U, unsigned Idx) {
2216-
return Broadcast != &U && !U.usesScalars(LiveIn);
2217-
});
2220+
auto *Broadcast = Builder.createNaryOp(VPInstruction::Broadcast, {VPV});
2221+
VPV->replaceUsesWithIf(Broadcast,
2222+
[VPV, Broadcast](VPUser &U, unsigned Idx) {
2223+
return Broadcast != &U && !U.usesScalars(VPV);
2224+
});
22182225
}
22192226
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,8 @@ struct VPlanTransforms {
189189
optimizeInductionExitUsers(VPlan &Plan,
190190
DenseMap<VPValue *, VPValue *> &EndValues);
191191

192-
/// Add explicit broadcasts for live-ins used as vectors.
193-
static void materializeLiveInBroadcasts(VPlan &Plan);
192+
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
193+
static void materializeBroadcasts(VPlan &Plan);
194194
};
195195

196196
} // namespace llvm

llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@ define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) {
107107
; CHECK: vector.ph:
108108
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
109109
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
110-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x ptr> poison, ptr [[START:%.*]], i64 0
111-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x ptr> [[BROADCAST_SPLATINSERT]], <32 x ptr> poison, <32 x i32> zeroinitializer
112110
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], 8
113-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
111+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP1]]
112+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x ptr> poison, ptr [[START]], i64 0
113+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x ptr> [[BROADCAST_SPLATINSERT]], <32 x ptr> poison, <32 x i32> zeroinitializer
114114
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
115115
; CHECK: vector.body:
116116
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,13 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
145145
; CHECK: vector.ph:
146146
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[L]], 64
147147
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[L]], [[N_MOD_VF]]
148+
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
149+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
148150
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
149151
; CHECK-NEXT: [[IND_END:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
150-
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
151-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
152-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
152+
; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]]
153+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
153154
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
154-
; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[BROADCAST_SPLAT3]]
155155
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
156156
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
157157
; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, [[DOTSPLAT]]
@@ -164,10 +164,10 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
164164
; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <16 x i16> [[STEP_ADD]], [[TMP1]]
165165
; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <16 x i16> [[STEP_ADD_2]], [[TMP1]]
166166
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
167-
; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i16> [[VEC_IND]], [[TMP2]]
168-
; CHECK-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[STEP_ADD]], [[TMP2]]
169-
; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[TMP2]]
170-
; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[TMP2]]
167+
; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i16> [[VEC_IND]], [[BROADCAST_SPLAT3]]
168+
; CHECK-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT3]]
169+
; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[BROADCAST_SPLAT3]]
170+
; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[BROADCAST_SPLAT3]]
171171
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[TMP3]]
172172
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
173173
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16

llvm/test/Transforms/LoopVectorize/X86/induction-step.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
1212
; CHECK: vector.ph:
1313
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
1414
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
15-
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
16-
; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DOTCAST]], [[O_1]]
1715
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
1816
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
17+
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
18+
; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DOTCAST]], [[O_1]]
1919
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT]]
2020
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
2121
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
@@ -86,13 +86,13 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
8686
; CHECK: vector.ph:
8787
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
8888
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
89+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
90+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
8991
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
9092
; CHECK-NEXT: [[TMP1:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
91-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
92-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
93-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
93+
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT]]
94+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
9495
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
95-
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT2]]
9696
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
9797
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
9898
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[DOTSPLAT]]
@@ -103,8 +103,8 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
103103
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
104104
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], [[TMP2]]
105105
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
106-
; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
107-
; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]]
106+
; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT2]]
107+
; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT2]]
108108
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[TMP4]]
109109
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
110110
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s
3+
4+
declare i32 @foo()
5+
6+
; Test case for https://github.com/llvm/llvm-project/issues/128838. Make sure
7+
; we do not crash when expanding %step.
8+
define void @test(ptr %dst) personality ptr null {
9+
; CHECK-LABEL: define void @test(
10+
; CHECK-SAME: ptr [[DST:%.*]]) personality ptr null {
11+
; CHECK-NEXT: [[ENTRY:.*:]]
12+
; CHECK-NEXT: [[STEP:%.*]] = invoke i32 @foo()
13+
; CHECK-NEXT: to label %[[LOOP_PREHEADER:.*]] unwind label %[[LPAD:.*]]
14+
; CHECK: [[LOOP_PREHEADER]]:
15+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16+
; CHECK: [[VECTOR_PH]]:
17+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
18+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
19+
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 160, [[STEP]]
20+
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> splat (i32 4), [[BROADCAST_SPLAT]]
21+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
22+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
23+
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
24+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP2]]
25+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
26+
; CHECK: [[VECTOR_BODY]]:
27+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
28+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
29+
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], [[TMP1]]
30+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
31+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP3]]
32+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
33+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4
34+
; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP5]], align 8
35+
; CHECK-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP6]], align 8
36+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
37+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], [[TMP1]]
38+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 160
39+
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
40+
; CHECK: [[MIDDLE_BLOCK]]:
41+
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
42+
; CHECK: [[SCALAR_PH]]:
43+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 160, %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
44+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
45+
; CHECK-NEXT: br label %[[LOOP:.*]]
46+
; CHECK: [[LOOP]]:
47+
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[ADD:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
48+
; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ]
49+
; CHECK-NEXT: [[ADD]] = add i64 [[IV_1]], 1
50+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV_1]]
51+
; CHECK-NEXT: store i32 [[IV_2]], ptr [[GEP]], align 8
52+
; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], [[STEP]]
53+
; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV_1]], 161
54+
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
55+
; CHECK: [[EXIT]]:
56+
; CHECK-NEXT: ret void
57+
; CHECK: [[LPAD]]:
58+
; CHECK-NEXT: [[LANDINGPAD:%.*]] = landingpad { ptr, i32 }
59+
; CHECK-NEXT: cleanup
60+
; CHECK-NEXT: ret void
61+
;
62+
entry:
63+
%step = invoke i32 @foo()
64+
to label %loop unwind label %lpad
65+
66+
loop:
67+
%iv.1 = phi i64 [ 0, %entry ], [ %add, %loop ]
68+
%iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop ]
69+
%add = add i64 %iv.1, 1
70+
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv.1
71+
store i32 %iv.2, ptr %gep, align 8
72+
%iv.2.next = add i32 %iv.2, %step
73+
%ec = icmp ult i64 %iv.1, 161
74+
br i1 %ec, label %loop, label %exit
75+
76+
exit:
77+
ret void
78+
79+
lpad:
80+
%landingpad = landingpad { ptr, i32 }
81+
cleanup
82+
ret void
83+
}

llvm/test/Transforms/LoopVectorize/float-induction.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
8484
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
8585
; VEC4_INTERL2: vector.ph:
8686
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640
87+
; VEC4_INTERL2-NEXT: [[FPINC_INS:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
8788
; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
8889
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
8990
; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]]
90-
; VEC4_INTERL2-NEXT: [[FPINC_INS:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
9191
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[FPINC_INS]], <float 4.000000e+00, float poison, float poison, float poison>
9292
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <4 x i32> zeroinitializer
9393
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
@@ -331,10 +331,10 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
331331
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
332332
; VEC4_INTERL2: vector.ph:
333333
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640
334+
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
334335
; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
335336
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]]
336337
; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]]
337-
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
338338
; VEC4_INTERL2-NEXT: [[MUL:%.*]] = fmul reassoc <4 x float> [[DOTSPLATINSERT2]], <float 4.000000e+00, float poison, float poison, float poison>
339339
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[MUL]], <4 x float> poison, <4 x i32> zeroinitializer
340340
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
@@ -835,17 +835,17 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
835835
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
836836
; VEC4_INTERL2: vector.ph:
837837
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483640
838+
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
838839
; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
839840
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01
840841
; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000
841842
; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float
842843
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]]
843844
; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
844-
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
845-
; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
846-
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
847845
; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLATINSERT2]], <float 4.000000e+00, float poison, float poison, float poison>
848846
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> poison, <4 x i32> zeroinitializer
847+
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
848+
; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
849849
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
850850
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
851851
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0

0 commit comments

Comments
 (0)