llama : disable FA for AMD

2024-04-24 16:48:10 +03:00 · 2024-04-24 16:48:10 +03:00 · ce281b904c
commit ce281b904c
parent 8937ec5307
3 changed files with 12 additions and 2 deletions
--- a/ggml-cuda/fattn.cu
+++ b/ggml-cuda/fattn.cu
@ -2,7 +2,10 @@
 #include "fattn.cuh"

 #include <cstdint>
+
+#if FP16_MMA_AVAILABLE
 #include <mma.h>
+#endif

 #define FATTN_KQ_STRIDE       256
 #define HALF_MAX_HALF         __float2half(65504.0f/2) // Use neg. of this instead of -INFINITY to initialize KQ max vals to avoid NaN upon subtraction.