From 9bb262b988897ba847b7303b5f11f8b11e0a7a3b Mon Sep 17 00:00:00 2001
From: Justine Tunney <jtunney@mozilla.com>
Date: Thu, 14 Nov 2024 14:47:53 -0800
Subject: [PATCH] Log CUDA kernel vs. runtime versions

The goal here is to hopefully provide a troubleshooting hint, that helps
users resolve CUDA version compatibility issues.
---
 llama.cpp/ggml-cuda.cu | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/llama.cpp/ggml-cuda.cu b/llama.cpp/ggml-cuda.cu
index 3ca5a41089..379664741d 100644
--- a/llama.cpp/ggml-cuda.cu
+++ b/llama.cpp/ggml-cuda.cu
@@ -356,8 +356,22 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
 
 GGML_CALL bool ggml_cuda_link(const struct ggml_backend_api *backend_api) {
     g_backend = backend_api;
-    if (!FLAG_log_disable)
+
+    if (!FLAG_log_disable) {
+        int kernelVersion = 0;
+        cudaDriverGetVersion(&kernelVersion);
+        fprintf(stderr, "%s: CUDA kernel version %d.%d\n", __func__,
+                kernelVersion / 1000, (kernelVersion % 1000) / 10);
+
+        int runtimeVersion = 0;
+        cudaRuntimeGetVersion(&runtimeVersion);
+        fprintf(stderr, "%s: CUDA runtime version is %d.%d%s\n", __func__,
+                runtimeVersion / 1000, (runtimeVersion % 1000) / 10,
+                runtimeVersion > kernelVersion ? " (!!!)" : "");
+
         fprintf(stderr, "%s: welcome to " GGML_CUDA_NAME " SDK with " BLAS_NAME "\n", __func__);
+    }
+
 #ifdef __HIP_PLATFORM_AMD__
     // cargo culting workaround below
 #ifndef GGML_USE_TINYBLAS