From 9f3e9e3a09ed4e2e9bdc305fc4e682abbd26ae75 Mon Sep 17 00:00:00 2001 From: Daniele <57776841+daniandtheweb@users.noreply.github.com> Date: Thu, 4 Jul 2024 19:35:10 +0000 Subject: [PATCH] CUDA: revert part of the RDNA1 optimizations The change on the launch_bounds was causing a small performance drop in perplexity of 25 t/s --- ggml/src/ggml-cuda/mmq.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index deaed066f..921af2f2a 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -2263,9 +2263,9 @@ static __device__ void mul_mat_q_process_tile( template #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) -#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1) +#if defined(RDNA3) || defined(RDNA2) __launch_bounds__(WARP_SIZE*nwarps, 2) -#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1) +#endif // defined(RDNA3) || defined(RDNA2) #else #if __CUDA_ARCH__ >= CC_VOLTA __launch_bounds__(WARP_SIZE*nwarps, 1)