Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PeepholeOpt: Fix looking for def of current copy to coalesce #125533

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 32 additions & 14 deletions llvm/lib/CodeGen/PeepholeOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,8 @@ class PeepholeOptimizer : private MachineFunction::Delegate {
bool optimizeUncoalescableCopy(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool optimizeRecurrence(MachineInstr &PHI);
bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
bool findNextSource(const TargetRegisterClass *DefRC, unsigned DefSubReg,
RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
DenseMap<Register, MachineInstr *> &ImmDefMIs);
bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
Expand Down Expand Up @@ -991,8 +992,10 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
return TII->optimizeCondBranch(MI);
}

/// Try to find the next source that share the same register file
/// for the value defined by \p Reg and \p SubReg.
/// Try to find a better source value that shares the same register file to
/// replace \p RegSubReg in an instruction like
/// `DefRC.DefSubReg = COPY RegSubReg`
///
/// When true is returned, the \p RewriteMap can be used by the client to
/// retrieve all Def -> Use along the way up to the next source. Any found
/// Use that is not itself a key for another entry, is the next source to
Expand All @@ -1002,17 +1005,15 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
/// share the same register file as \p Reg and \p SubReg. The client should
/// then be capable to rewrite all intermediate PHIs to get the next source.
/// \return False if no alternative sources are available. True otherwise.
bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
bool PeepholeOptimizer::findNextSource(const TargetRegisterClass *DefRC,
unsigned DefSubReg,
RegSubRegPair RegSubReg,
RewriteMapTy &RewriteMap) {
// Do not try to find a new source for a physical register.
// So far we do not have any motivating example for doing that.
// Thus, instead of maintaining untested code, we will revisit that if
// that changes at some point.
Register Reg = RegSubReg.Reg;
if (Reg.isPhysical())
return false;
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);

SmallVector<RegSubRegPair, 4> SrcToLook;
RegSubRegPair CurSrcPair = RegSubReg;
SrcToLook.push_back(CurSrcPair);
Expand Down Expand Up @@ -1076,7 +1077,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,

// Keep following the chain if the value isn't any better yet.
const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
if (!TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
CurSrcPair.SubReg))
continue;

Expand Down Expand Up @@ -1184,21 +1185,33 @@ bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) {
bool Changed = false;
// Get the right rewriter for the current copy.
// Rewrite each rewritable source.
RegSubRegPair Src;
RegSubRegPair Dst;
RegSubRegPair TrackPair;
while (CpyRewriter.getNextRewritableSource(Src, TrackPair)) {
while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
if (Dst.Reg.isPhysical()) {
// Do not try to find a new source for a physical register.
// So far we do not have any motivating example for doing that.
// Thus, instead of maintaining untested code, we will revisit that if
// that changes at some point.
continue;
}

const TargetRegisterClass *DefRC = MRI->getRegClass(Dst.Reg);

// Keep track of PHI nodes and its incoming edges when looking for sources.
RewriteMapTy RewriteMap;
// Try to find a more suitable source. If we failed to do so, or get the
// actual source, move to the next source.
if (!findNextSource(TrackPair, RewriteMap))
if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
continue;

// Get the new source to rewrite. TODO: Only enable handling of multiple
// sources (PHIs) once we have a motivating example and testcases for it.
RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
/*HandleMultipleSources=*/false);
if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0)
assert(TrackPair.Reg != NewSrc.Reg &&
"should not rewrite source to original value");
if (!NewSrc.Reg)
continue;

// Rewrite source.
Expand Down Expand Up @@ -1325,9 +1338,14 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
if (Def.Reg.isPhysical())
return false;

// FIXME: Uncoalescable copies are treated differently by
// UncoalescableRewriter, and this probably should not share
// API. getNextRewritableSource really finds rewritable defs.
const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);

// If we do not know how to rewrite this definition, there is no point
// in trying to kill this instruction.
if (!findNextSource(Def, RewriteMap))
if (!findNextSource(DefRC, Def.SubReg, Def, RewriteMap))
return false;

RewritePairs.push_back(Def);
Expand Down
60 changes: 30 additions & 30 deletions llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
; -O1: ldxrb w8, [x0]
; -O1: stxrb w9, w1, [x0]
; -O1: ldxrb w0, [x8]
; -O1: stxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
ret i8 %r
}
Expand All @@ -27,8 +27,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
; -O1: ldaxrb w8, [x0]
; -O1: stxrb w9, w1, [x0]
; -O1: ldaxrb w0, [x8]
; -O1: stxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
ret i8 %r
}
Expand All @@ -42,8 +42,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
; -O1: ldxrb w8, [x0]
; -O1: stlxrb w9, w1, [x0]
; -O1: ldxrb w0, [x8]
; -O1: stlxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
ret i8 %r
}
Expand All @@ -57,8 +57,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
; -O1: ldaxrb w8, [x0]
; -O1: stlxrb w9, w1, [x0]
; -O1: ldaxrb w0, [x8]
; -O1: stlxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
ret i8 %r
}
Expand All @@ -72,8 +72,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
; -O1: ldaxrb w8, [x0]
; -O1: stlxrb w9, w1, [x0]
; -O1: ldaxrb w0, [x8]
; -O1: stlxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
ret i8 %r
}
Expand All @@ -86,8 +86,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
; -O0: subs w8, w8, w9, uxth
;
; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
; -O1: ldxrh w8, [x0]
; -O1: stxrh w9, w1, [x0]
; -O1: ldxrh w0, [x8]
; -O1: stxrh w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i16 %value monotonic, align 2
ret i16 %r
}
Expand All @@ -100,8 +100,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
; -O0: subs w8, w8, w9, uxth
;
; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
; -O1: ldaxrh w8, [x0]
; -O1: stxrh w9, w1, [x0]
; -O1: ldaxrh w0, [x8]
; -O1: stxrh w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i16 %value acquire, align 2
ret i16 %r
}
Expand All @@ -114,8 +114,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
; -O0: subs w8, w8, w9, uxth
;
; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
; -O1: ldxrh w8, [x0]
; -O1: stlxrh w9, w1, [x0]
; -O1: ldxrh w0, [x8]
; -O1: stlxrh w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i16 %value release, align 2
ret i16 %r
}
Expand All @@ -128,8 +128,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
; -O0: subs w8, w8, w9, uxth
;
; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
; -O1: ldaxrh w8, [x0]
; -O1: stlxrh w9, w1, [x0]
; -O1: ldaxrh w0, [x8]
; -O1: stlxrh w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i16 %value acq_rel, align 2
ret i16 %r
}
Expand All @@ -142,8 +142,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
; -O0: subs w8, w8, w9, uxth
;
; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
; -O1: ldaxrh w8, [x0]
; -O1: stlxrh w9, w1, [x0]
; -O1: ldaxrh w0, [x8]
; -O1: stlxrh w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i16 %value seq_cst, align 2
ret i16 %r
}
Expand Down Expand Up @@ -392,8 +392,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
; -O1: ldxrb w8, [x0]
; -O1: stxrb w9, w1, [x0]
; -O1: ldxrb w0, [x8]
; -O1: stxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
ret i8 %r
}
Expand All @@ -407,8 +407,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
; -O1: ldaxrb w8, [x0]
; -O1: stxrb w9, w1, [x0]
; -O1: ldaxrb w0, [x8]
; -O1: stxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
ret i8 %r
}
Expand All @@ -422,8 +422,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
; -O1: ldxrb w8, [x0]
; -O1: stlxrb w9, w1, [x0]
; -O1: ldxrb w0, [x8]
; -O1: stlxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
ret i8 %r
}
Expand All @@ -437,8 +437,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
; -O1: ldaxrb w8, [x0]
; -O1: stlxrb w9, w1, [x0]
; -O1: ldaxrb w0, [x8]
; -O1: stlxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
ret i8 %r
}
Expand All @@ -452,8 +452,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
; -O0: subs w8, w8, w10, uxtb
;
; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
; -O1: ldaxrb w8, [x0]
; -O1: stlxrb w9, w1, [x0]
; -O1: ldaxrb w0, [x8]
; -O1: stlxrb w9, w1, [x8]
%r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
ret i8 %r
}
Expand Down
Loading