Merge branch 'master' into gg/flash-attn

2024-03-27 10:24:09 +02:00 · 2024-03-27 10:24:09 +02:00 · 013721df2b
commit 013721df2b
parent e425810bb6 cbc8343619
157 changed files with 19090 additions and 15488 deletions
--- a/ggml-cuda/fattn.cuh
+++ b/ggml-cuda/fattn.cuh
@ -0,0 +1,6 @@
+#include "common.cuh"
+
+void ggml_cuda_flash_attn_ext(
+        ggml_backend_cuda_context & ctx,
+        const ggml_tensor * Q, const ggml_tensor * K, const ggml_tensor * V,
+        const ggml_tensor * mask, ggml_tensor * KQV);