From 21560fe6b9c73133fd86723071877c55106df010 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler <50044286+ro-i@users.noreply.github.com> Date: Tue, 4 Feb 2025 17:59:03 +0100 Subject: [PATCH] GlobalISel: Fix defined register of invariant.start (#125664) In contrast to SelectionDAG, GlobalISel created a new virtual register for the return value of invariant.start, leaving subsequent users of the invariant.start value with an undefined reference. A minimal example: ``` %tmp = alloca i32, align 4, addrspace(5) %tmpI = call ptr @llvm.invariant.start.p5(i64 4, ptr addrspace(5) %tmp) #3 call void @llvm.invariant.end.p5(ptr %tmpI, i64 4, ptr addrspace(5) %tmp) #3 store i32 %i, ptr %tmpI, align 4 ``` Although the return value of invariant.start might not be intended for any use beyond invariant.end (the fuzzer might not have created a sensible situation here), an implicit definition of the corresponding virtual register avoids a segfault in the target instruction selector later. This LLVM defect was identified via the AMD Fuzzing project. --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 4 +--- .../AArch64/GlobalISel/arm64-irtranslator.ll | 2 +- .../promote-dependency-on-invariant-result.ll | 23 +++++++++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/promote-dependency-on-invariant-result.ll diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 3e43299bb81102e..362d856e76a8aa7 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2441,9 +2441,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } case Intrinsic::invariant_start: { - LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); - Register Undef = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildUndef(Undef); + MIRBuilder.buildUndef(getOrCreateVReg(CI)); return true; } case Intrinsic::invariant_end: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 7a67cf3fd4c942b..15ee5e48a88e6e9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -2262,7 +2262,7 @@ declare ptr @llvm.invariant.start.p0(i64, ptr nocapture) readonly nounwind declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture) nounwind define void @test_invariant_intrin() { ; CHECK-LABEL: name: test_invariant_intrin -; CHECK: %{{[0-9]+}}:_(s64) = G_IMPLICIT_DEF +; CHECK: %{{[0-9]+}}:_(p0) = G_IMPLICIT_DEF ; CHECK-NEXT: RET_ReallyLR %x = alloca %t %inv = call ptr @llvm.invariant.start.p0(i64 8, ptr %x) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/promote-dependency-on-invariant-result.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/promote-dependency-on-invariant-result.ll new file mode 100644 index 000000000000000..090aa067a526068 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/promote-dependency-on-invariant-result.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O0 -global-isel=true --stop-after=irtranslator -o - %s | FileCheck %s + +declare ptr @llvm.invariant.start.p5(i64 immarg, ptr addrspace(5) nocapture) +declare void @llvm.invariant.end.p5(ptr, i64 immarg, ptr addrspace(5) nocapture) + +define void @use_invariant_promotable_lds(ptr addrspace(5) %arg, i32 %i) { + ; CHECK-LABEL: name: use_invariant_promotable_lds + ; CHECK: bb.1.bb: + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[C]](s32), [[DEF]](p0) :: (store (s32) into %ir.tmp) + ; CHECK-NEXT: SI_RETURN +bb: + %tmp = call ptr @llvm.invariant.start.p5(i64 4, ptr addrspace(5) %arg) + call void @llvm.invariant.end.p5(ptr %tmp, i64 4, ptr addrspace(5) %arg) + store i32 0, ptr %tmp, align 4 + ret void +}