From bc143ecf81401295dd19a4b66b5d643ec37e4ad2 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 8 Nov 2024 10:27:43 +0200 Subject: [PATCH] cuda : disable BF16 FA ggml-ci --- ggml/src/ggml-cuda.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index e27c8e87d..357cee660 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -3159,6 +3159,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g #ifndef FLASH_ATTN_AVAILABLE return false; #endif + if (op->src[1]->type == GGML_TYPE_BF16 || op->src[2]->type == GGML_TYPE_BF16) { + return false; + } if (op->src[0]->ne[0] == 64 && op->src[1]->type == GGML_TYPE_F16) { return true; }