disable fp16 mat mul completely with multi GPU

2023-09-30 17:17:38 +02:00 · 2023-09-30 17:17:38 +02:00 · 39ddda27f4
commit 39ddda27f4
parent 59937e45a3
1 changed files with 1 additions and 1 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -6118,7 +6118,7 @@ inline void ggml_cuda_op_mul_mat_cublas(

    const int compute_capability = g_compute_capabilities[id];

-    if (compute_capability >= CC_VOLTA && (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && ldc == row_diff) {
+    if (compute_capability >= CC_VOLTA && (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1]) {
        // convert src0 and src1 to fp16, multiply as fp16, convert dst to fp32
        half * src0_as_f16 = nullptr;
        size_t src0_as = 0;