From cf65429c3832d32a8c17c7ed5ab47066d7511fbe Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon, 3 Jul 2023 16:56:40 -0500 Subject: [PATCH] print cuda or opencl based on what's used --- otherarch/gptj_v3.cpp | 10 +++++++++- otherarch/mpt_v3.cpp | 10 +++++++++- otherarch/neox_v3.cpp | 10 +++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp index 66cad6f5cff44..10e38aa6962c9 100644 --- a/otherarch/gptj_v3.cpp +++ b/otherarch/gptj_v3.cpp @@ -348,7 +348,11 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g const auto & hparams = model.hparams; size_t vram_total = 0; const int n_gpu = std::min(gpulayers, int(hparams.n_layer)); + #if defined(GGML_USE_CLBLAST) fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu); + #else + fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu); + #endif for (int i = 0; i < n_gpu; ++i) { const auto & layer = model.layers[i]; layer.c_attn_q_proj_w->backend = GGML_BACKEND_GPU; @@ -373,7 +377,11 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g ggml_cuda_transform_tensor(layer.c_mlp_proj_w->data,layer.c_mlp_proj_w); vram_total += ggml_nbytes(layer.c_mlp_proj_w); #endif } - fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #else + fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #endif } #endif diff --git a/otherarch/mpt_v3.cpp b/otherarch/mpt_v3.cpp index ef362a051c3d3..5344c32180722 100644 --- a/otherarch/mpt_v3.cpp +++ b/otherarch/mpt_v3.cpp @@ -301,7 +301,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo const auto & hparams = model.hparams; size_t vram_total = 0; const int n_gpu = std::min(gpulayers, int(hparams.n_layers)); + #if defined(GGML_USE_CLBLAST) fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu); + #else + fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu); + #endif for (int i = 0; i < n_gpu; ++i) { const auto & layer = model.layers[i]; layer.ffn_up_proj->backend = GGML_BACKEND_GPU; @@ -320,7 +324,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo ggml_cuda_transform_tensor(layer.c_attn_out_proj_weight->data,layer.c_attn_out_proj_weight); vram_total += ggml_nbytes(layer.c_attn_out_proj_weight); #endif } - fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #else + fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #endif } #endif diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp index 7522b8f8da0a2..e4facf92da9e8 100644 --- a/otherarch/neox_v3.cpp +++ b/otherarch/neox_v3.cpp @@ -335,7 +335,11 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model & const auto & hparams = model.hparams; size_t vram_total = 0; const int n_gpu = std::min(gpulayers, int(hparams.n_layer)); + #if defined(GGML_USE_CLBLAST) fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu); + #else + fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu); + #endif for (int i = 0; i < n_gpu; ++i) { const auto & layer = model.layers[i]; layer.c_attn_attn_w->backend = GGML_BACKEND_GPU; @@ -354,7 +358,11 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model & ggml_cuda_transform_tensor(layer.c_mlp_proj_w->data,layer.c_mlp_proj_w); vram_total += ggml_nbytes(layer.c_mlp_proj_w); #endif } - fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #else + fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #endif } #endif