Skip to content

Commit a896000

Browse files
arsenmIcohedron
authored andcommitted
AMDGPU: Use default shouldRewriteCopySrc (llvm#125535)
This was ultimately working around bugs in subregister handling in peephole-opt. In the common case, it would give up on folding anything into a subregister extract copy.
1 parent 83e2673 commit a896000

9 files changed

+418
-446
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

-24
Original file line numberDiff line numberDiff line change
@@ -3516,30 +3516,6 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
35163516
OpType <= AMDGPU::OPERAND_SRC_LAST;
35173517
}
35183518

3519-
bool SIRegisterInfo::shouldRewriteCopySrc(
3520-
const TargetRegisterClass *DefRC,
3521-
unsigned DefSubReg,
3522-
const TargetRegisterClass *SrcRC,
3523-
unsigned SrcSubReg) const {
3524-
// We want to prefer the smallest register class possible, so we don't want to
3525-
// stop and rewrite on anything that looks like a subregister
3526-
// extract. Operations mostly don't care about the super register class, so we
3527-
// only want to stop on the most basic of copies between the same register
3528-
// class.
3529-
//
3530-
// e.g. if we have something like
3531-
// %0 = ...
3532-
// %1 = ...
3533-
// %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
3534-
// %3 = COPY %2, sub0
3535-
//
3536-
// We want to look through the COPY to find:
3537-
// => %3 = COPY %0
3538-
3539-
// Plain copy.
3540-
return getCommonSubClass(DefRC, SrcRC) != nullptr;
3541-
}
3542-
35433519
bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
35443520
// TODO: 64-bit operands have extending behavior from 32-bit literal.
35453521
return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

-5
Original file line numberDiff line numberDiff line change
@@ -275,11 +275,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
275275
const TargetRegisterClass *SubRC,
276276
unsigned SubIdx) const;
277277

278-
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
279-
unsigned DefSubReg,
280-
const TargetRegisterClass *SrcRC,
281-
unsigned SrcSubReg) const override;
282-
283278
/// \returns True if operands defined with this operand type can accept
284279
/// a literal constant (i.e. any 32-bit immediate).
285280
bool opCanUseLiteralConstant(unsigned OpType) const;

llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll

+22-22
Original file line numberDiff line numberDiff line change
@@ -163,33 +163,33 @@ define amdgpu_kernel void @test_copy_v4i8_x3(ptr addrspace(1) %out0, ptr addrspa
163163
define amdgpu_kernel void @test_copy_v4i8_x4(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3, ptr addrspace(1) %in) nounwind {
164164
; SI-LABEL: test_copy_v4i8_x4:
165165
; SI: ; %bb.0:
166-
; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x11
167-
; SI-NEXT: s_mov_b32 s3, 0xf000
168-
; SI-NEXT: s_mov_b32 s10, 0
169-
; SI-NEXT: s_mov_b32 s11, s3
166+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x11
167+
; SI-NEXT: s_mov_b32 s11, 0xf000
168+
; SI-NEXT: s_mov_b32 s2, 0
169+
; SI-NEXT: s_mov_b32 s3, s11
170170
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
171171
; SI-NEXT: v_mov_b32_e32 v1, 0
172172
; SI-NEXT: s_waitcnt lgkmcnt(0)
173-
; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
174-
; SI-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x9
175-
; SI-NEXT: s_mov_b32 s2, -1
176-
; SI-NEXT: s_mov_b32 s14, s2
177-
; SI-NEXT: s_mov_b32 s15, s3
178-
; SI-NEXT: s_mov_b32 s18, s2
173+
; SI-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
174+
; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
175+
; SI-NEXT: s_mov_b32 s10, -1
176+
; SI-NEXT: s_mov_b32 s14, s10
177+
; SI-NEXT: s_mov_b32 s15, s11
178+
; SI-NEXT: s_mov_b32 s18, s10
179179
; SI-NEXT: s_waitcnt lgkmcnt(0)
180-
; SI-NEXT: s_mov_b32 s0, s4
181-
; SI-NEXT: s_mov_b32 s1, s5
182-
; SI-NEXT: s_mov_b32 s19, s3
183-
; SI-NEXT: s_mov_b32 s22, s2
184-
; SI-NEXT: s_mov_b32 s23, s3
185-
; SI-NEXT: s_mov_b32 s12, s6
186-
; SI-NEXT: s_mov_b32 s13, s7
187-
; SI-NEXT: s_mov_b32 s16, s8
188-
; SI-NEXT: s_mov_b32 s17, s9
189-
; SI-NEXT: s_mov_b32 s20, s10
190-
; SI-NEXT: s_mov_b32 s21, s11
180+
; SI-NEXT: s_mov_b32 s8, s0
181+
; SI-NEXT: s_mov_b32 s9, s1
182+
; SI-NEXT: s_mov_b32 s19, s11
183+
; SI-NEXT: s_mov_b32 s22, s10
184+
; SI-NEXT: s_mov_b32 s23, s11
185+
; SI-NEXT: s_mov_b32 s12, s2
186+
; SI-NEXT: s_mov_b32 s13, s3
187+
; SI-NEXT: s_mov_b32 s16, s4
188+
; SI-NEXT: s_mov_b32 s17, s5
189+
; SI-NEXT: s_mov_b32 s20, s6
190+
; SI-NEXT: s_mov_b32 s21, s7
191191
; SI-NEXT: s_waitcnt vmcnt(0)
192-
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
192+
; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0
193193
; SI-NEXT: buffer_store_dword v0, off, s[12:15], 0
194194
; SI-NEXT: buffer_store_dword v0, off, s[16:19], 0
195195
; SI-NEXT: buffer_store_dword v0, off, s[20:23], 0

llvm/test/CodeGen/AMDGPU/ctpop64.ll

+18-18
Original file line numberDiff line numberDiff line change
@@ -334,58 +334,58 @@ define amdgpu_kernel void @v_ctpop_v4i64(ptr addrspace(1) noalias %out, ptr addr
334334
define amdgpu_kernel void @ctpop_i64_in_br(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %ctpop_arg, i32 %cond) {
335335
; SI-LABEL: ctpop_i64_in_br:
336336
; SI: ; %bb.0: ; %entry
337-
; SI-NEXT: s_load_dword s8, s[4:5], 0xf
337+
; SI-NEXT: s_load_dword s6, s[4:5], 0xf
338338
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
339-
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
339+
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
340340
; SI-NEXT: s_waitcnt lgkmcnt(0)
341-
; SI-NEXT: s_cmp_lg_u32 s8, 0
341+
; SI-NEXT: s_cmp_lg_u32 s6, 0
342342
; SI-NEXT: s_cbranch_scc0 .LBB7_4
343343
; SI-NEXT: ; %bb.1: ; %else
344-
; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x2
344+
; SI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x2
345345
; SI-NEXT: s_mov_b64 s[2:3], 0
346346
; SI-NEXT: s_andn2_b64 vcc, exec, s[2:3]
347347
; SI-NEXT: s_waitcnt lgkmcnt(0)
348348
; SI-NEXT: s_mov_b64 vcc, vcc
349349
; SI-NEXT: s_cbranch_vccnz .LBB7_3
350350
; SI-NEXT: .LBB7_2: ; %if
351-
; SI-NEXT: s_bcnt1_i32_b64 s4, s[6:7]
352-
; SI-NEXT: s_mov_b32 s5, 0
351+
; SI-NEXT: s_bcnt1_i32_b64 s6, s[4:5]
352+
; SI-NEXT: s_mov_b32 s7, 0
353353
; SI-NEXT: .LBB7_3: ; %endif
354-
; SI-NEXT: v_mov_b32_e32 v0, s4
354+
; SI-NEXT: v_mov_b32_e32 v0, s6
355355
; SI-NEXT: s_mov_b32 s3, 0xf000
356356
; SI-NEXT: s_mov_b32 s2, -1
357-
; SI-NEXT: v_mov_b32_e32 v1, s5
357+
; SI-NEXT: v_mov_b32_e32 v1, s7
358358
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
359359
; SI-NEXT: s_endpgm
360360
; SI-NEXT: .LBB7_4:
361-
; SI-NEXT: ; implicit-def: $sgpr4_sgpr5
361+
; SI-NEXT: ; implicit-def: $sgpr6_sgpr7
362362
; SI-NEXT: s_branch .LBB7_2
363363
;
364364
; VI-LABEL: ctpop_i64_in_br:
365365
; VI: ; %bb.0: ; %entry
366-
; VI-NEXT: s_load_dword s8, s[4:5], 0x3c
366+
; VI-NEXT: s_load_dword s6, s[4:5], 0x3c
367367
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
368-
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
368+
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
369369
; VI-NEXT: s_waitcnt lgkmcnt(0)
370-
; VI-NEXT: s_cmp_lg_u32 s8, 0
370+
; VI-NEXT: s_cmp_lg_u32 s6, 0
371371
; VI-NEXT: s_cbranch_scc0 .LBB7_4
372372
; VI-NEXT: ; %bb.1: ; %else
373-
; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x8
373+
; VI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x8
374374
; VI-NEXT: s_cbranch_execnz .LBB7_3
375375
; VI-NEXT: .LBB7_2: ; %if
376376
; VI-NEXT: s_waitcnt lgkmcnt(0)
377-
; VI-NEXT: s_bcnt1_i32_b64 s4, s[6:7]
378-
; VI-NEXT: s_mov_b32 s5, 0
377+
; VI-NEXT: s_bcnt1_i32_b64 s6, s[4:5]
378+
; VI-NEXT: s_mov_b32 s7, 0
379379
; VI-NEXT: .LBB7_3: ; %endif
380380
; VI-NEXT: s_waitcnt lgkmcnt(0)
381-
; VI-NEXT: v_mov_b32_e32 v0, s4
381+
; VI-NEXT: v_mov_b32_e32 v0, s6
382382
; VI-NEXT: s_mov_b32 s3, 0xf000
383383
; VI-NEXT: s_mov_b32 s2, -1
384-
; VI-NEXT: v_mov_b32_e32 v1, s5
384+
; VI-NEXT: v_mov_b32_e32 v1, s7
385385
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
386386
; VI-NEXT: s_endpgm
387387
; VI-NEXT: .LBB7_4:
388-
; VI-NEXT: ; implicit-def: $sgpr4_sgpr5
388+
; VI-NEXT: ; implicit-def: $sgpr6_sgpr7
389389
; VI-NEXT: s_branch .LBB7_2
390390
entry:
391391
%tmp0 = icmp eq i32 %cond, 0

0 commit comments

Comments
 (0)