Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only call rocblas_initialize for versions < 4 to eliminate unncessary VRAM allocation on some AMD cards #11080

Merged
merged 9 commits into from
Jan 28, 2025
Prev Previous commit
Next Next commit
Address PR feedback
  • Loading branch information
sARY77 committed Jan 26, 2025
commit bb37819954c472297673ce834513890aad024e55
1 change: 1 addition & 0 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ let
rocmBuildInputs = with rocmPackages; [
clr
hipblas
rocblas
];

vulkanBuildInputs = [
Expand Down
2 changes: 1 addition & 1 deletion .devops/rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to HIP supported archs.
# This is mostly tied to rocBLAS supported archs.
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
# gfx906 is deprecated
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ jobs:
id: depends
run: |
sudo apt-get update
sudo apt-get install -y build-essential git cmake hipblas-dev
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev

- name: Build with native CMake HIP support
id: cmake_build
Expand Down Expand Up @@ -1125,7 +1125,10 @@ jobs:
-DGGML_HIP=ON `
-DGGML_RPC=ON
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
md "build\bin\rocblas\library\"
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"

- name: Determine tag name
id: tag
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ endif # GGML_HIP_UMA

MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
MK_LDFLAGS += -lhipblas -lamdhip64
MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas

HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc

Expand Down
11 changes: 0 additions & 11 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
#include "console.h"
#include "log.h"
#include "sampling.h"
// vvv REMOVE BEFORE MERGING
#include "llama-model.h"
#include "llama-impl.h"
// ^^^ REMOVE BEFORE MERGING
#include "llama.h"
#include "chat-template.hpp"

Expand Down Expand Up @@ -916,13 +912,6 @@ int main(int argc, char ** argv) {
}

LOG("\n\n");
// vvv REMOVE BEFORE MERGING
for (auto * dev : model->devices) {
size_t free, total; // NOLINT
ggml_backend_dev_memory(dev, &free, &total);
LLAMA_LOG_INFO("%s: using device %s (%s) - %zu MiB free\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), free/1024/1024);
}
// ^^^ REMOVE BEFORE MERGING
common_perf_print(ctx, smpl);

common_sampler_free(smpl);
Expand Down
14 changes: 14 additions & 0 deletions ggml/src/ggml-cuda/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,20 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
}

static ggml_cuda_device_info ggml_cuda_init() {
#ifdef __HIP_PLATFORM_AMD__
// Workaround for a rocBLAS bug when using multiple graphics cards:
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
{
char version_string[64];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While this is fine it would be slightly better here to use rocblas_get_version_string_size to let rocblas tell you how big the buffer needs to be.

version_string[0] = '\0';
const rocblas_status status = rocblas_get_version_string(version_string, sizeof(version_string));
if (status != rocblas_status_success || version_string[0] < '4') {
Copy link
Collaborator

@IMbackK IMbackK Jan 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't like this too much as this will ofc fail if rocblas changes its version to 10.0.0 or whatever. I think we should make a bit more effort to parse this properly. Looking at rocblas (https://github.com/ROCm/rocBLAS/blob/59825a7367a24eed4e7e8a483820592089eaf17e/library/src/buildinfo.cpp#L29) it seams we would be on the safe side to use string_split<int> here https://github.com/ggerganov/llama.cpp/blob/df984e014714cba4c99ef894b20b51cbcef31b16/common/common.h#L439

however currently common.h is not used outside of the clients/examples and contains code that makes no sense in the backend.
@ggerganov maybe you can weigh in on if its ok to use this header here or if we should move function somewhere else.

rocblas_initialize();
CUDA_CHECK(cudaDeviceSynchronize());
}
}
#endif

ggml_cuda_device_info info = {};

cudaError_t err = cudaGetDeviceCount(&info.device_count);
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-cuda/vendors/hip.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
#include <hipblas/hipblas.h>
#include <hip/hip_fp16.h>
#include <hip/hip_bfloat16.h>
#ifdef __HIP_PLATFORM_AMD__
// for rocblas_initialize()
#include "rocblas/rocblas.h"
#endif // __HIP_PLATFORM_AMD__
#define CUBLAS_COMPUTE_16F HIPBLAS_R_16F
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
Expand Down
3 changes: 2 additions & 1 deletion ggml/src/ggml-hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ endif()

find_package(hip REQUIRED)
find_package(hipblas REQUIRED)
find_package(rocblas REQUIRED)

message(STATUS "HIP and hipBLAS found")

Expand Down Expand Up @@ -110,4 +111,4 @@ if (GGML_STATIC)
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
endif()

target_link_libraries(ggml-hip PRIVATE ggml-base hip::host roc::hipblas)
target_link_libraries(ggml-hip PRIVATE ggml-base hip::host roc::rocblas roc::hipblas)
Loading