diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index 851a2230bf30..9a15e3337f32 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -1639,9 +1639,9 @@ static void getTrivialDefaultFunctionAttributes( // TODO: NoThrow attribute should be added for other GPU modes CUDA, SYCL, // HIP, OpenMP offload. // AFAIK, neither of them support exceptions in device code. - if ((langOpts.CUDA && langOpts.CUDAIsDevice) || langOpts.SYCLIsDevice) + if (langOpts.SYCLIsDevice) llvm_unreachable("NYI"); - if (langOpts.OpenCL) { + if (langOpts.OpenCL || (langOpts.CUDA && langOpts.CUDAIsDevice)) { auto noThrow = cir::NoThrowAttr::get(CGM.getBuilder().getContext()); funcAttrs.set(noThrow.getMnemonic(), noThrow); } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 444883f6d774..c58d260e166a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -516,16 +516,32 @@ void CIRGenModule::emitGlobal(GlobalDecl GD) { assert(!Global->hasAttr() && "NYI"); if (langOpts.CUDA) { - if (langOpts.CUDAIsDevice) - llvm_unreachable("NYI"); + if (langOpts.CUDAIsDevice) { + // This will implicitly mark templates and their + // specializations as __host__ __device__. + if (langOpts.OffloadImplicitHostDeviceTemplates) + llvm_unreachable("NYI"); - if (dyn_cast(Global)) - llvm_unreachable("NYI"); + // This maps some parallel standard libraries implicitly + // to GPU, even when they are not marked __device__. + if (langOpts.HIPStdPar) + llvm_unreachable("NYI"); - // We must skip __device__ functions when compiling for host. - if (!Global->hasAttr() && Global->hasAttr()) { - return; + if (Global->hasAttr()) + llvm_unreachable("NYI"); + + if (!Global->hasAttr()) + return; + } else { + // We must skip __device__ functions when compiling for host. + if (!Global->hasAttr() && + Global->hasAttr()) { + return; + } } + + if (dyn_cast(Global)) + llvm_unreachable("NYI"); } if (langOpts.OpenMP) { @@ -2415,8 +2431,6 @@ StringRef CIRGenModule::getMangledName(GlobalDecl GD) { } } - assert(!langOpts.CUDAIsDevice && "NYI"); - // Keep the first result in the case of a mangling collision. const auto *ND = cast(GD.getDecl()); std::string MangledName = getMangledNameImpl(*this, GD, ND); @@ -3099,7 +3113,8 @@ void CIRGenModule::emitDeferred(unsigned recursionLimit) { // Emit CUDA/HIP static device variables referenced by host code only. Note we // should not clear CUDADeviceVarODRUsedByHost since it is still needed for // further handling. - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + !getASTContext().CUDADeviceVarODRUsedByHost.empty()) { llvm_unreachable("NYI"); } @@ -3392,10 +3407,6 @@ void CIRGenModule::Release() { llvm_unreachable("NYI"); } - if (langOpts.CUDAIsDevice && getTriple().isNVPTX()) { - llvm_unreachable("NYI"); - } - if (langOpts.EHAsynch) llvm_unreachable("NYI"); diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index 916010a4f19c..66d6a57e242a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -348,7 +348,11 @@ mlir::Type CIRGenTypes::convertType(QualType T) { // For the device-side compilation, CUDA device builtin surface/texture types // may be represented in different types. - assert(!astContext.getLangOpts().CUDAIsDevice && "not implemented"); + if (astContext.getLangOpts().CUDAIsDevice) { + if (Ty->isCUDADeviceBuiltinSurfaceType() || + Ty->isCUDADeviceBuiltinTextureType()) + llvm_unreachable("NYI"); + } if (const auto *recordType = dyn_cast(T)) return convertRecordDeclType(recordType->getDecl()); diff --git a/clang/test/CIR/CodeGen/CUDA/simple-device.cu b/clang/test/CIR/CodeGen/CUDA/simple-device.cu new file mode 100644 index 000000000000..c19a09a7e40b --- /dev/null +++ b/clang/test/CIR/CodeGen/CUDA/simple-device.cu @@ -0,0 +1,14 @@ +#include "../Inputs/cuda.h" + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ +// RUN: -fclangir -emit-cir -o - %s | FileCheck %s + +// This shouldn't emit. +__host__ void host_fn(int *a, int *b, int *c) {} + +// CHECK-NOT: cir.func @_Z7host_fnPiS_S_ + +// This should emit as a normal C++ function. +__device__ void device_fn(int* a, double b, float c) {} + +// CIR: cir.func @_Z9device_fnPidf