From 44a80b41191cc5bda2824e049dd9eb32c96be501 Mon Sep 17 00:00:00 2001 From: JohannesGaessler Date: Sat, 17 Feb 2024 20:09:13 +0100 Subject: [PATCH] CUDA: switch tile sizes based on binary version --- ggml-cuda.cu | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index b35fcb7fd..b04f6830c 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -6941,11 +6941,12 @@ static void ggml_mul_mat_q4_0_q8_1_cuda( const void * vx, const void * vy, float * dst, const int ncols_x, const int nrows_x, const int ncols_y, const int nrows_y, const int nrows_dst, cudaStream_t stream) { + int mmq_x, mmq_y, nwarps; +#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) int id; CUDA_CHECK(cudaGetDevice(&id)); const int compute_capability = g_device_caps[id].cc; - int mmq_x, mmq_y, nwarps; if (compute_capability >= CC_RDNA2) { mmq_x = MMQ_X_Q4_0_RDNA2; mmq_y = MMQ_Y_Q4_0_RDNA2; @@ -6954,17 +6955,26 @@ static void ggml_mul_mat_q4_0_q8_1_cuda( mmq_x = MMQ_X_Q4_0_RDNA1; mmq_y = MMQ_Y_Q4_0_RDNA1; nwarps = NWARPS_Q4_0_RDNA1; - } else if (compute_capability >= CC_VOLTA) { + } else { + GGML_ASSERT(false); + } +#else + cudaFuncAttributes attributes; + CUDA_CHECK(cudaFuncGetAttributes(&attributes, mul_mat_q4_0)); + const int cc_binary = 10*attributes.binaryVersion; + + if (cc_binary >= CC_VOLTA) { mmq_x = MMQ_X_Q4_0_AMPERE; mmq_y = MMQ_Y_Q4_0_AMPERE; nwarps = NWARPS_Q4_0_AMPERE; - } else if (compute_capability >= MIN_CC_DP4A) { + } else if (cc_binary >= MIN_CC_DP4A) { mmq_x = MMQ_X_Q4_0_PASCAL; mmq_y = MMQ_Y_Q4_0_PASCAL; nwarps = NWARPS_Q4_0_PASCAL; } else { GGML_ASSERT(false); } +#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y; const int block_num_y = (ncols_y + mmq_x - 1) / mmq_x;