From f8e3fc6c746b37d69656fb5ae6af8e411d85dbca Mon Sep 17 00:00:00 2001
From: Henri Vasserman <henv@hot.ee>
Date: Sat, 29 Jul 2023 14:16:46 +0300
Subject: [PATCH] rocblas init stuff

---
 ggml-cuda.cu | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 91e6c078e..cd122c5be 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -10,7 +10,10 @@
 #include <hip/hip_runtime.h>
 #include <hipblas/hipblas.h>
 #include <hip/hip_fp16.h>
+#ifdef __HIP_PLATFORM_AMD__
+// for rocblas_initialize()
 #include "rocblas/rocblas.h"
+#endif
 #define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
 #define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
 #define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
@@ -2746,10 +2749,14 @@ void ggml_init_cublas() {
     static bool initialized = false;
 
     if (!initialized) {
-#ifdef GGML_USE_HIPBLAS
-    rocblas_initialize();
-    hipDeviceSynchronize();
+
+#ifdef __HIP_PLATFORM_AMD__
+        // Workaround for a rocBLAS bug when using multiple graphics cards:
+        // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
+        rocblas_initialize();
+        CUDA_CHECK(cudaDeviceSynchronize());
 #endif
+
         CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
         GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
         int64_t total_vram = 0;