From 878aa4f209e453254a6640afc29d41b1d35273bf Mon Sep 17 00:00:00 2001 From: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> Date: Mon, 23 Oct 2023 15:09:50 -0600 Subject: [PATCH] Apply suggestions from code review These changes plus: ```c++ #define cublasGemmBatchedEx hipblasGemmBatchedEx ``` are needed to compile with ROCM. I haven't done performance testing, but it seems to work. I couldn't figure out how to propose a change for lines outside what the pull changed, also this is the first time trying to create a multi-part review so please forgive me if I mess something up. --- ggml-cuda.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index ebfd6c15e..c0383d19e 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -7154,9 +7154,9 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const } // allocate device memory for pointers - void ** src0_ptrs_as = nullptr; - void ** src1_ptrs_as = nullptr; - void ** dst_ptrs_as = nullptr; + const void ** src0_ptrs_as = nullptr; + const void ** src1_ptrs_as = nullptr; + void ** dst_ptrs_as = nullptr; CUDA_CHECK(cudaMalloc(&src0_ptrs_as, ne23*sizeof(void *))); CUDA_CHECK(cudaMalloc(&src1_ptrs_as, ne23*sizeof(void *))); @@ -7170,9 +7170,9 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const CUBLAS_CHECK( cublasGemmBatchedEx(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N, ne01, ne11, ne10, - &alpha_f16, (void **) src0_ptrs_as, CUDA_R_16F, nb01/sizeof(half), - (void **) src1_ptrs_as, CUDA_R_16F, nb11/sizeof(float), - &beta_f16, (void **) dst_ptrs_as, CUDA_R_16F, ne01, + &alpha_f16, (const void **) src0_ptrs_as, CUDA_R_16F, nb01/sizeof(half), + (const void **) src1_ptrs_as, CUDA_R_16F, nb11/sizeof(float), + &beta_f16, ( void **) dst_ptrs_as, CUDA_R_16F, ne01, ne23, CUBLAS_COMPUTE_16F, CUBLAS_GEMM_DEFAULT_TENSOR_OP));