rocblas init stuff
This commit is contained in:
parent
d2ade639f4
commit
f8e3fc6c74
1 changed files with 10 additions and 3 deletions
13
ggml-cuda.cu
13
ggml-cuda.cu
|
@ -10,7 +10,10 @@
|
||||||
#include <hip/hip_runtime.h>
|
#include <hip/hip_runtime.h>
|
||||||
#include <hipblas/hipblas.h>
|
#include <hipblas/hipblas.h>
|
||||||
#include <hip/hip_fp16.h>
|
#include <hip/hip_fp16.h>
|
||||||
|
#ifdef __HIP_PLATFORM_AMD__
|
||||||
|
// for rocblas_initialize()
|
||||||
#include "rocblas/rocblas.h"
|
#include "rocblas/rocblas.h"
|
||||||
|
#endif
|
||||||
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
|
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
|
||||||
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
|
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
|
||||||
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
|
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
|
||||||
|
@ -2746,10 +2749,14 @@ void ggml_init_cublas() {
|
||||||
static bool initialized = false;
|
static bool initialized = false;
|
||||||
|
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
#ifdef GGML_USE_HIPBLAS
|
|
||||||
rocblas_initialize();
|
#ifdef __HIP_PLATFORM_AMD__
|
||||||
hipDeviceSynchronize();
|
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
||||||
|
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
|
||||||
|
rocblas_initialize();
|
||||||
|
CUDA_CHECK(cudaDeviceSynchronize());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
|
CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
|
||||||
GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
|
GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
|
||||||
int64_t total_vram = 0;
|
int64_t total_vram = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue