From 39ddda27f43e487a4c05375003f5bc7acb38a742 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Sat, 30 Sep 2023 17:17:38 +0200
Subject: [PATCH] disable fp16 mat mul completely with multi GPU

---
 ggml-cuda.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 2fd15957f..989c419cd 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -6118,7 +6118,7 @@ inline void ggml_cuda_op_mul_mat_cublas(
 
     const int compute_capability = g_compute_capabilities[id];
 
-    if (compute_capability >= CC_VOLTA && (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && ldc == row_diff) {
+    if (compute_capability >= CC_VOLTA && (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1]) {
         // convert src0 and src1 to fp16, multiply as fp16, convert dst to fp32
         half * src0_as_f16 = nullptr;
         size_t src0_as = 0;