Address PR feedback

2025-01-26 14:44:04 -08:00 · 2025-01-26 14:44:04 -08:00 · bb37819954
commit bb37819954
parent 9c27481ed0
8 changed files with 27 additions and 15 deletions
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@ -93,6 +93,7 @@ let
  rocmBuildInputs = with rocmPackages; [
    clr
    hipblas
+    rocblas
  ];

  vulkanBuildInputs = [
--- a/.devops/rocm.Dockerfile
+++ b/.devops/rocm.Dockerfile
@ -12,7 +12,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build

 # Unless otherwise specified, we make a fat build.
 # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
-# This is mostly tied to HIP supported archs.
+# This is mostly tied to rocBLAS supported archs.
 # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
 # gfx906 is deprecated
 #check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -361,7 +361,7 @@ jobs:
        id: depends
        run: |
          sudo apt-get update
-          sudo apt-get install -y build-essential git cmake hipblas-dev
+          sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev

      - name: Build with native CMake HIP support
        id: cmake_build
@ -1125,7 +1125,10 @@ jobs:
            -DGGML_HIP=ON `
            -DGGML_RPC=ON
          cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
+          md "build\bin\rocblas\library\"
          cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
+          cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
+          cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"

      - name: Determine tag name
        id: tag
--- a/2
+++ b/2
@ -781,7 +781,7 @@ endif # GGML_HIP_UMA

 	MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
 	MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
-	MK_LDFLAGS += -lhipblas -lamdhip64
+	MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas

 	HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc

--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -3,10 +3,6 @@
 #include "console.h"
 #include "log.h"
 #include "sampling.h"
-// vvv REMOVE BEFORE MERGING
-#include "llama-model.h"
-#include "llama-impl.h"
-// ^^^ REMOVE BEFORE MERGING
 #include "llama.h"
 #include "chat-template.hpp"

@ -916,13 +912,6 @@ int main(int argc, char ** argv) {
    }

    LOG("\n\n");
-    // vvv REMOVE BEFORE MERGING
-    for (auto * dev : model->devices) {
-        size_t free, total; // NOLINT
-        ggml_backend_dev_memory(dev, &free, &total);
-        LLAMA_LOG_INFO("%s: using device %s (%s) - %zu MiB free\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), free/1024/1024);
-    }
-    // ^^^ REMOVE BEFORE MERGING
    common_perf_print(ctx, smpl);

    common_sampler_free(smpl);
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@ -120,6 +120,20 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
 }

 static ggml_cuda_device_info ggml_cuda_init() {
+#ifdef __HIP_PLATFORM_AMD__
+    // Workaround for a rocBLAS bug when using multiple graphics cards:
+    // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
+    {
+        char version_string[64];
+        version_string[0] = '\0';
+        const rocblas_status status = rocblas_get_version_string(version_string, sizeof(version_string));
+        if (status != rocblas_status_success || version_string[0] < '4') {
+            rocblas_initialize();
+            CUDA_CHECK(cudaDeviceSynchronize());
+        }
+    }
+#endif
+
    ggml_cuda_device_info info = {};

    cudaError_t err = cudaGetDeviceCount(&info.device_count);
--- a/ggml/src/ggml-cuda/vendors/hip.h
+++ b/ggml/src/ggml-cuda/vendors/hip.h
@ -4,6 +4,10 @@
 #include <hipblas/hipblas.h>
 #include <hip/hip_fp16.h>
 #include <hip/hip_bfloat16.h>
+#ifdef __HIP_PLATFORM_AMD__
+// for rocblas_initialize()
+#include "rocblas/rocblas.h"
+#endif // __HIP_PLATFORM_AMD__
 #define CUBLAS_COMPUTE_16F HIPBLAS_R_16F
 #define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
 #define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
--- a/ggml/src/ggml-hip/CMakeLists.txt
+++ b/ggml/src/ggml-hip/CMakeLists.txt
@ -38,6 +38,7 @@ endif()

 find_package(hip     REQUIRED)
 find_package(hipblas REQUIRED)
+find_package(rocblas REQUIRED)

 message(STATUS "HIP and hipBLAS found")

@ -110,4 +111,4 @@ if (GGML_STATIC)
    message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
 endif()

-target_link_libraries(ggml-hip PRIVATE ggml-base hip::host roc::hipblas)
+target_link_libraries(ggml-hip PRIVATE ggml-base hip::host roc::rocblas roc::hipblas)