restrict fp16 mat mul to volta and up

2023-09-28 00:39:14 +02:00 · 2023-09-28 00:39:14 +02:00 · 7d5674dd2d
commit 7d5674dd2d
parent 32ada53c8e
1 changed files with 3 additions and 2 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -6059,8 +6059,9 @@ inline void ggml_cuda_op_mul_mat_cublas(
    // ldc == nrows of the matrix that cuBLAS writes into
    int ldc = dst->backend == GGML_BACKEND_GPU && id == g_main_device ? ne0 : row_diff;
-    // TODO: this may be slower in older architectures with poor fp16 support
+    const int compute_capability = g_compute_capabilities[id];
-    if (src0->type == GGML_TYPE_F16 && ggml_is_contiguous(src0) && ldc == row_diff) {
+
    if (compute_capability >= CC_TURING && src0->type == GGML_TYPE_F16 && ggml_is_contiguous(src0) && ldc == row_diff) {
        // convert src1 to fp16, multiply as fp16, convert dst to fp32
        half * src1_as_f16 = nullptr;
        size_t src1_as = 0;