From b1f6fab684b33b722bec5a88d56c171624d63a89 Mon Sep 17 00:00:00 2001 From: JohannesGaessler Date: Sun, 11 Feb 2024 19:01:44 +0100 Subject: [PATCH] refactor boolean logic --- ggml-cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 8aa70f143..dd2e5461a 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -9967,7 +9967,7 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1 #ifdef CUDA_USE_TENSOR_CORES // when tensor cores are available, use them for large batch size // ref: https://github.com/ggerganov/llama.cpp/pull/3776 - use_mul_mat_q = use_mul_mat_q && !(fp16_performance_good && src1->ne[1] > MMQ_MAX_BATCH_SIZE); + use_mul_mat_q = use_mul_mat_q && (!fp16_performance_good || src1->ne[1] <= MMQ_MAX_BATCH_SIZE); #endif // CUDA_USE_TENSOR_CORES #endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)