Skip to content

Commit

Permalink
AMDGPU: Use default shouldRewriteCopySrc
Browse files Browse the repository at this point in the history
This was ultimately working around bugs in subregister handling
in peephole-opt. In the common case, it would give up on folding
anything into a subregister extract copy.
  • Loading branch information
arsenm committed Feb 4, 2025
1 parent 84db705 commit e7b88d2
Show file tree
Hide file tree
Showing 9 changed files with 418 additions and 446 deletions.
24 changes: 0 additions & 24 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3516,30 +3516,6 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
OpType <= AMDGPU::OPERAND_SRC_LAST;
}

bool SIRegisterInfo::shouldRewriteCopySrc(
const TargetRegisterClass *DefRC,
unsigned DefSubReg,
const TargetRegisterClass *SrcRC,
unsigned SrcSubReg) const {
// We want to prefer the smallest register class possible, so we don't want to
// stop and rewrite on anything that looks like a subregister
// extract. Operations mostly don't care about the super register class, so we
// only want to stop on the most basic of copies between the same register
// class.
//
// e.g. if we have something like
// %0 = ...
// %1 = ...
// %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
// %3 = COPY %2, sub0
//
// We want to look through the COPY to find:
// => %3 = COPY %0

// Plain copy.
return getCommonSubClass(DefRC, SrcRC) != nullptr;
}

bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
// TODO: 64-bit operands have extending behavior from 32-bit literal.
return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
const TargetRegisterClass *SubRC,
unsigned SubIdx) const;

bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
unsigned DefSubReg,
const TargetRegisterClass *SrcRC,
unsigned SrcSubReg) const override;

/// \returns True if operands defined with this operand type can accept
/// a literal constant (i.e. any 32-bit immediate).
bool opCanUseLiteralConstant(unsigned OpType) const;
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -163,33 +163,33 @@ define amdgpu_kernel void @test_copy_v4i8_x3(ptr addrspace(1) %out0, ptr addrspa
define amdgpu_kernel void @test_copy_v4i8_x4(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_copy_v4i8_x4:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x11
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s10, 0
; SI-NEXT: s_mov_b32 s11, s3
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x11
; SI-NEXT: s_mov_b32 s11, 0xf000
; SI-NEXT: s_mov_b32 s2, 0
; SI-NEXT: s_mov_b32 s3, s11
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
; SI-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_mov_b32 s14, s2
; SI-NEXT: s_mov_b32 s15, s3
; SI-NEXT: s_mov_b32 s18, s2
; SI-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
; SI-NEXT: s_mov_b32 s18, s10
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s0, s4
; SI-NEXT: s_mov_b32 s1, s5
; SI-NEXT: s_mov_b32 s19, s3
; SI-NEXT: s_mov_b32 s22, s2
; SI-NEXT: s_mov_b32 s23, s3
; SI-NEXT: s_mov_b32 s12, s6
; SI-NEXT: s_mov_b32 s13, s7
; SI-NEXT: s_mov_b32 s16, s8
; SI-NEXT: s_mov_b32 s17, s9
; SI-NEXT: s_mov_b32 s20, s10
; SI-NEXT: s_mov_b32 s21, s11
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_mov_b32 s22, s10
; SI-NEXT: s_mov_b32 s23, s11
; SI-NEXT: s_mov_b32 s12, s2
; SI-NEXT: s_mov_b32 s13, s3
; SI-NEXT: s_mov_b32 s16, s4
; SI-NEXT: s_mov_b32 s17, s5
; SI-NEXT: s_mov_b32 s20, s6
; SI-NEXT: s_mov_b32 s21, s7
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0
; SI-NEXT: buffer_store_dword v0, off, s[12:15], 0
; SI-NEXT: buffer_store_dword v0, off, s[16:19], 0
; SI-NEXT: buffer_store_dword v0, off, s[20:23], 0
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/ctpop64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -334,58 +334,58 @@ define amdgpu_kernel void @v_ctpop_v4i64(ptr addrspace(1) noalias %out, ptr addr
define amdgpu_kernel void @ctpop_i64_in_br(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %ctpop_arg, i32 %cond) {
; SI-LABEL: ctpop_i64_in_br:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dword s8, s[4:5], 0xf
; SI-NEXT: s_load_dword s6, s[4:5], 0xf
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_cmp_lg_u32 s8, 0
; SI-NEXT: s_cmp_lg_u32 s6, 0
; SI-NEXT: s_cbranch_scc0 .LBB7_4
; SI-NEXT: ; %bb.1: ; %else
; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x2
; SI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x2
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: s_andn2_b64 vcc, exec, s[2:3]
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccnz .LBB7_3
; SI-NEXT: .LBB7_2: ; %if
; SI-NEXT: s_bcnt1_i32_b64 s4, s[6:7]
; SI-NEXT: s_mov_b32 s5, 0
; SI-NEXT: s_bcnt1_i32_b64 s6, s[4:5]
; SI-NEXT: s_mov_b32 s7, 0
; SI-NEXT: .LBB7_3: ; %endif
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: v_mov_b32_e32 v0, s6
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v1, s5
; SI-NEXT: v_mov_b32_e32 v1, s7
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB7_4:
; SI-NEXT: ; implicit-def: $sgpr4_sgpr5
; SI-NEXT: ; implicit-def: $sgpr6_sgpr7
; SI-NEXT: s_branch .LBB7_2
;
; VI-LABEL: ctpop_i64_in_br:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dword s8, s[4:5], 0x3c
; VI-NEXT: s_load_dword s6, s[4:5], 0x3c
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_cmp_lg_u32 s8, 0
; VI-NEXT: s_cmp_lg_u32 s6, 0
; VI-NEXT: s_cbranch_scc0 .LBB7_4
; VI-NEXT: ; %bb.1: ; %else
; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x8
; VI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x8
; VI-NEXT: s_cbranch_execnz .LBB7_3
; VI-NEXT: .LBB7_2: ; %if
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bcnt1_i32_b64 s4, s[6:7]
; VI-NEXT: s_mov_b32 s5, 0
; VI-NEXT: s_bcnt1_i32_b64 s6, s[4:5]
; VI-NEXT: s_mov_b32 s7, 0
; VI-NEXT: .LBB7_3: ; %endif
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v0, s6
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
; VI-NEXT: .LBB7_4:
; VI-NEXT: ; implicit-def: $sgpr4_sgpr5
; VI-NEXT: ; implicit-def: $sgpr6_sgpr7
; VI-NEXT: s_branch .LBB7_2
entry:
%tmp0 = icmp eq i32 %cond, 0
Expand Down
Loading

0 comments on commit e7b88d2

Please sign in to comment.