llvm · bcardosolopes · Feb 6, 2025 · Feb 5, 2025
@@ -1639,9 +1639,9 @@ static void getTrivialDefaultFunctionAttributes(
   // TODO: NoThrow attribute should be added for other GPU modes CUDA, SYCL,
   // HIP, OpenMP offload.
   // AFAIK, neither of them support exceptions in device code.
-  if ((langOpts.CUDA && langOpts.CUDAIsDevice) || langOpts.SYCLIsDevice)
+  if (langOpts.SYCLIsDevice)
     llvm_unreachable("NYI");
-  if (langOpts.OpenCL) {
+  if (langOpts.OpenCL || (langOpts.CUDA && langOpts.CUDAIsDevice)) {
     auto noThrow = cir::NoThrowAttr::get(CGM.getBuilder().getContext());
     funcAttrs.set(noThrow.getMnemonic(), noThrow);
   }

@@ -516,16 +516,32 @@ void CIRGenModule::emitGlobal(GlobalDecl GD) {
   assert(!Global->hasAttr<CPUDispatchAttr>() && "NYI");
 
   if (langOpts.CUDA) {
-    if (langOpts.CUDAIsDevice)
-      llvm_unreachable("NYI");
+    if (langOpts.CUDAIsDevice) {
+      // This will implicitly mark templates and their
+      // specializations as __host__ __device__.
+      if (langOpts.OffloadImplicitHostDeviceTemplates)
+        llvm_unreachable("NYI");
 
-    if (dyn_cast<VarDecl>(Global))
-      llvm_unreachable("NYI");
+      // This maps some parallel standard libraries implicitly
+      // to GPU, even when they are not marked __device__.
+      if (langOpts.HIPStdPar)
+        llvm_unreachable("NYI");
 
-    // We must skip __device__ functions when compiling for host.
-    if (!Global->hasAttr<CUDAHostAttr>() && Global->hasAttr<CUDADeviceAttr>()) {
-      return;
+      if (Global->hasAttr<CUDAGlobalAttr>())
+        llvm_unreachable("NYI");
+
+      if (!Global->hasAttr<CUDADeviceAttr>())
+        return;
+    } else {
+      // We must skip __device__ functions when compiling for host.
+      if (!Global->hasAttr<CUDAHostAttr>() &&
+          Global->hasAttr<CUDADeviceAttr>()) {
+        return;
+      }
     }
+
+    if (dyn_cast<VarDecl>(Global))
+      llvm_unreachable("NYI");
   }
 
   if (langOpts.OpenMP) {
@@ -2415,8 +2431,6 @@ StringRef CIRGenModule::getMangledName(GlobalDecl GD) {
     }
   }
 
-  assert(!langOpts.CUDAIsDevice && "NYI");
-
   // Keep the first result in the case of a mangling collision.
   const auto *ND = cast<NamedDecl>(GD.getDecl());
   std::string MangledName = getMangledNameImpl(*this, GD, ND);
@@ -3099,7 +3113,8 @@ void CIRGenModule::emitDeferred(unsigned recursionLimit) {
   // Emit CUDA/HIP static device variables referenced by host code only. Note we
   // should not clear CUDADeviceVarODRUsedByHost since it is still needed for
   // further handling.
-  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+      !getASTContext().CUDADeviceVarODRUsedByHost.empty()) {
     llvm_unreachable("NYI");
   }
 
@@ -3392,10 +3407,6 @@ void CIRGenModule::Release() {
     llvm_unreachable("NYI");
   }
 
-  if (langOpts.CUDAIsDevice && getTriple().isNVPTX()) {
-    llvm_unreachable("NYI");
-  }
-
   if (langOpts.EHAsynch)
     llvm_unreachable("NYI");
 

@@ -348,7 +348,11 @@ mlir::Type CIRGenTypes::convertType(QualType T) {
 
   // For the device-side compilation, CUDA device builtin surface/texture types
   // may be represented in different types.
-  assert(!astContext.getLangOpts().CUDAIsDevice && "not implemented");
+  if (astContext.getLangOpts().CUDAIsDevice) {
+    if (Ty->isCUDADeviceBuiltinSurfaceType() ||
+        Ty->isCUDADeviceBuiltinTextureType())
+      llvm_unreachable("NYI");
+  }
 
   if (const auto *recordType = dyn_cast<RecordType>(T))
     return convertRecordDeclType(recordType->getDecl());

diff --git a/clang/test/CIR/CodeGen/CUDA/simple-device.cu b/clang/test/CIR/CodeGen/CUDA/simple-device.cu
@@ -0,0 +1,14 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
+// RUN:            -fclangir -emit-cir -o - %s | FileCheck %s
+
+// This shouldn't emit.
+__host__ void host_fn(int *a, int *b, int *c) {}
+
+// CHECK-NOT: cir.func @_Z7host_fnPiS_S_
+
+// This should emit as a normal C++ function.
+__device__ void device_fn(int* a, double b, float c) {}
+
+// CIR: cir.func @_Z9device_fnPidf