Skip to content

Commit

Permalink
[CIR][CUDA] Generate attribute for kernel name of device stubs
Browse files Browse the repository at this point in the history
  • Loading branch information
AdUhTkJm committed Feb 7, 2025
1 parent a07dbdf commit 1ce7b1f
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 12 deletions.
17 changes: 17 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,23 @@ def GlobalAnnotationValuesAttr : CIR_Attr<"GlobalAnnotationValues",
let genVerifyDecl = 1;
}

//===----------------------------------------------------------------------===//
// CUDAKernelNameAttr
//===----------------------------------------------------------------------===//

def CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName",
"cuda_kernel_name"> {
let summary = "Device-side function name for this stub.";
let description = [{
This attribute is attached to function definitions and records the mangled name of the kernel function used on the device.

In CUDA, global functions (kernels) are processed differently for host and device. On host, Clang generates device stubs; on device, they are treated as normal functions. As they probably have different mangled names, we must record the corresponding device-side name for a stub.
}];

let parameters = (ins "std::string":$kernel_name);
let assemblyFormat = "`<` $kernel_name `>`";
}

def CIR_TBAAAttr : CIR_Attr<"TBAA", "tbaa", []> {
}

Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,16 @@ void CIRGenModule::constructAttributeList(
getLangOpts().OffloadUniformBlock)
assert(!cir::MissingFeatures::CUDA());

if (langOpts.CUDA && !langOpts.CUDAIsDevice &&
TargetDecl->hasAttr<CUDAGlobalAttr>()) {
GlobalDecl kernel(CalleeInfo.getCalleeDecl());
llvm::StringRef kernelName = getMangledName(
kernel.getWithKernelReferenceKind(KernelReferenceKind::Kernel));
auto attr =
cir::CUDAKernelNameAttr::get(&getMLIRContext(), kernelName.str());
funcAttrs.set(attr.getMnemonic(), attr);
}

if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())
;
}
Expand Down
18 changes: 8 additions & 10 deletions clang/lib/CIR/CodeGen/CIRGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
// This is the internal per-translation-unit state used for CIR translation.
//
//===----------------------------------------------------------------------===//
#include "CIRGenModule.h"
#include "CIRGenCXXABI.h"
#include "CIRGenCstEmitter.h"
#include "CIRGenFunction.h"
Expand Down Expand Up @@ -527,10 +526,9 @@ void CIRGenModule::emitGlobal(GlobalDecl GD) {
if (langOpts.HIPStdPar)
llvm_unreachable("NYI");

if (Global->hasAttr<CUDAGlobalAttr>())
llvm_unreachable("NYI");

if (!Global->hasAttr<CUDADeviceAttr>())
// Global functions reside on device, so it shouldn't be skipped.
if (!Global->hasAttr<CUDAGlobalAttr>() &&
!Global->hasAttr<CUDADeviceAttr>())
return;
} else {
// We must skip __device__ functions when compiling for host.
Expand Down Expand Up @@ -2351,10 +2349,10 @@ cir::FuncOp CIRGenModule::GetAddrOfFunction(clang::GlobalDecl GD, mlir::Type Ty,
auto F = GetOrCreateCIRFunction(MangledName, Ty, GD, ForVTable, DontDefer,
/*IsThunk=*/false, IsForDefinition);

// As __global__ functions always reside on device,
// we need special care when accessing them from host;
// otherwise, CUDA functions behave as normal functions
if (langOpts.CUDA && !langOpts.CUDAIsDevice &&
// As __global__ functions (kernels) always reside on device,
// when we access them from host, we must refer to the kernel handle.
// For CUDA, it's just the device stub. For HIP, it's something different.
if (langOpts.CUDA && !langOpts.CUDAIsDevice && langOpts.HIP &&
cast<FunctionDecl>(GD.getDecl())->hasAttr<CUDAGlobalAttr>()) {
llvm_unreachable("NYI");
}
Expand Down Expand Up @@ -2397,7 +2395,7 @@ static std::string getMangledNameImpl(CIRGenModule &CGM, GlobalDecl GD,
assert(0 && "NYI");
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
assert(0 && "NYI");
Out << "__device_stub__";
} else {
Out << II->getName();
}
Expand Down
4 changes: 4 additions & 0 deletions clang/test/CIR/CodeGen/CUDA/simple-device.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ __host__ void host_fn(int *a, int *b, int *c) {}
__device__ void device_fn(int* a, double b, float c) {}

// CIR: cir.func @_Z9device_fnPidf

// Global functions are also like normal functions.
__global__ void global_fn(int *a, int *b, int *c) {}
// CIR: cir.func @_Z9global_fnPiS_S_
14 changes: 12 additions & 2 deletions clang/test/CIR/CodeGen/CUDA/simple.cu
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#include "../Inputs/cuda.h"

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
// RUN: -emit-cir %s -o %t.cir
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
// RUN: -x cuda -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s

// Attribute for global_fn
// CIR: #cir.cuda_kernel_name<_Z9global_fnv>

// This should emit as a normal C++ function.
__host__ void host_fn(int *a, int *b, int *c) {}
Expand All @@ -14,3 +16,11 @@ __host__ void host_fn(int *a, int *b, int *c) {}
__device__ void device_fn(int* a, double b, float c) {}

// CHECK-NOT: cir.func @_Z9device_fnPidf

// This should emit a device stub,
// with an attribute showing the mangled name on device.
__global__ void global_fn();
// CIR: @_Z24__device_stub__global_fnv

// Make sure `global_fn` indeed gets emitted
void x() { auto v = global_fn; }

0 comments on commit 1ce7b1f

Please sign in to comment.