-
Notifications
You must be signed in to change notification settings - Fork 128
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CIR][CUDA] Generate attribute for kernel name of device stubs (#1317)
Now a `__global__` function on host will be generated to a device stub, with an attribute recording the corresponding kernel name (mangled name on device of the same function). The dynamic registration phase will be implemented in LLVM lowering. For example, CIR generated for `__global__ void global_fn();` looks like ``` #fn_attr1 = #cir<extra({cuda_kernel_name = #cir.cuda_kernel_name<_Z9global_fnv>})> cir.func private @_Z24__device_stub__global_fnv() extra(#fn_attr1) ```
- Loading branch information
Showing
6 changed files
with
79 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
//===---- CIRCUDAAttrs.td - CIR dialect attrs for CUDA -----*- tablegen -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This file declares the CIR dialect attributes for OpenCL. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef MLIR_CIR_DIALECT_CIR_CUDA_ATTRS | ||
#define MLIR_CIR_DIALECT_CIR_CUDA_ATTRS | ||
|
||
//===----------------------------------------------------------------------===// | ||
// CUDAKernelNameAttr | ||
//===----------------------------------------------------------------------===// | ||
|
||
def CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName", | ||
"cuda_kernel_name"> { | ||
let summary = "Device-side function name for this stub."; | ||
let description = | ||
[{ | ||
This attribute is attached to function definitions and records the | ||
mangled name of the kernel function used on the device. | ||
|
||
In CUDA, global functions (kernels) are processed differently for host | ||
and device. On host, Clang generates device stubs; on device, they are | ||
treated as normal functions. As they probably have different mangled | ||
names, we must record the corresponding device-side name for a stub. | ||
}]; | ||
|
||
let parameters = (ins "std::string":$kernel_name); | ||
let assemblyFormat = "`<` $kernel_name `>`"; | ||
} | ||
|
||
#endif // MLIR_CIR_DIALECT_CIR_CUDA_ATTRS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,31 @@ | ||
#include "../Inputs/cuda.h" | ||
|
||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ | ||
// RUN: -x cuda -emit-cir %s -o %t.cir | ||
// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s | ||
|
||
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ | ||
// RUN: -emit-cir %s -o %t.cir | ||
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s | ||
// RUN: -fcuda-is-device -emit-cir %s -o %t.cir | ||
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s | ||
|
||
// Attribute for global_fn | ||
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cuda_kernel_name<_Z9global_fnv>{{.*}} | ||
|
||
// This should emit as a normal C++ function. | ||
__host__ void host_fn(int *a, int *b, int *c) {} | ||
// CIR-HOST: cir.func @_Z7host_fnPiS_S_ | ||
// CIR-DEVICE-NOT: cir.func @_Z7host_fnPiS_S_ | ||
|
||
// CIR: cir.func @_Z7host_fnPiS_S_ | ||
|
||
// This shouldn't emit. | ||
__device__ void device_fn(int* a, double b, float c) {} | ||
// CIR-HOST-NOT: cir.func @_Z9device_fnPidf | ||
// CIR-DEVICE: cir.func @_Z9device_fnPidf | ||
|
||
#ifdef __CUDA_ARCH__ | ||
__global__ void global_fn() {} | ||
#else | ||
__global__ void global_fn(); | ||
#endif | ||
// CIR-HOST: @_Z24__device_stub__global_fnv(){{.*}}extra([[Kernel]]) | ||
// CIR-DEVICE: @_Z9global_fnv | ||
|
||
// CHECK-NOT: cir.func @_Z9device_fnPidf | ||
// Make sure `global_fn` indeed gets emitted | ||
__host__ void x() { auto v = global_fn; } |