parallelize fattn compilation test

2024-05-28 01:19:36 +02:00 · 2024-05-28 01:19:36 +02:00 · 1ca802a3e0
commit 1ca802a3e0
parent f4003cfba1
6 changed files with 390 additions and 288 deletions
--- a/ggml-cuda/fattn-vec-f16-f16.cu
+++ b/ggml-cuda/fattn-vec-f16-f16.cu
@ -0,0 +1,7 @@
+#include "common.cuh"
+#include "fattn-common.cuh"
+#include "fattn-vec-f16.cuh"
+
+DECL_FATTN_VEC_F16_INST(64, 1, 4, (vec_dot_fattn_vec_KQ_f16<half, 64>), false, dequantize_1_f16<half>);
+DECL_FATTN_VEC_F16_INST(128, 1, 4, (vec_dot_fattn_vec_KQ_f16<half, 128>), false, dequantize_1_f16<half>);
+DECL_FATTN_VEC_F16_INST(256, 1, 4, (vec_dot_fattn_vec_KQ_f16<half, 256>), false, dequantize_1_f16<half>);