diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 76221be12319..15697a9914a7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -2353,8 +2353,9 @@ cir::FuncOp CIRGenModule::GetAddrOfFunction(clang::GlobalDecl GD, mlir::Type Ty, // As __global__ functions (kernels) always reside on device, // when we access them from host, we must refer to the kernel handle. // For CUDA, it's just the device stub. For HIP, it's something different. - if (langOpts.CUDA && !langOpts.CUDAIsDevice && langOpts.HIP && - cast(GD.getDecl())->hasAttr()) { + if ((langOpts.CUDA || langOpts.HIP) && !langOpts.CUDAIsDevice && + cast(GD.getDecl())->hasAttr() && + cast(GD.getDecl())->isThisDeclarationADefinition()) { llvm_unreachable("NYI"); } diff --git a/clang/test/CIR/CodeGen/HIP/simple-device.cpp b/clang/test/CIR/CodeGen/HIP/simple-device.cpp deleted file mode 100644 index e627a90dc410..000000000000 --- a/clang/test/CIR/CodeGen/HIP/simple-device.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include "../Inputs/cuda.h" - -// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fcuda-is-device \ -// RUN: -fclangir -emit-cir -o - %s | FileCheck %s - -// This shouldn't emit. -__host__ void host_fn(int *a, int *b, int *c) {} - -// CHECK-NOT: cir.func @_Z7host_fnPiS_S_ - -// This should emit as a normal C++ function. -__device__ void device_fn(int* a, double b, float c) {} - -// CIR: cir.func @_Z9device_fnPidf diff --git a/clang/test/CIR/CodeGen/HIP/simple.cpp b/clang/test/CIR/CodeGen/HIP/simple.cpp index ec4110da10d7..4fa711373d89 100644 --- a/clang/test/CIR/CodeGen/HIP/simple.cpp +++ b/clang/test/CIR/CodeGen/HIP/simple.cpp @@ -1,16 +1,31 @@ #include "../Inputs/cuda.h" -// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ -// RUN: -emit-cir %s -o %t.cir -// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ +// RUN: -x hip -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \ +// RUN: -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s + +// Attribute for global_fn +// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cuda_kernel_name<_Z9global_fnv>{{.*}} -// This should emit as a normal C++ function. __host__ void host_fn(int *a, int *b, int *c) {} +// CIR-HOST: cir.func @_Z7host_fnPiS_S_ +// CIR-DEVICE-NOT: cir.func @_Z7host_fnPiS_S_ -// CIR: cir.func @_Z7host_fnPiS_S_ +__device__ void device_fn(int *a, double b, float c) {} +// CIR-HOST-NOT: cir.func @_Z9device_fnPidf +// CIR-DEVICE: cir.func @_Z9device_fnPidf -// This shouldn't emit. -__device__ void device_fn(int* a, double b, float c) {} +#ifdef __AMDGPU__ +__global__ void global_fn() {} +#else +__global__ void global_fn(); +#endif +// CIR-HOST: @_Z24__device_stub__global_fnv(){{.*}}extra([[Kernel]]) +// CIR-DEVICE: @_Z9global_fnv -// CHECK-NOT: cir.func @_Z9device_fnPidf +// Make sure `global_fn` indeed gets emitted +__host__ void x() { auto v = global_fn; }