parallelize fattn compilation test
This commit is contained in:
parent
f4003cfba1
commit
1ca802a3e0
6 changed files with 390 additions and 288 deletions
7
ggml-cuda/fattn-vec-f16-f16.cu
Normal file
7
ggml-cuda/fattn-vec-f16-f16.cu
Normal file
|
@ -0,0 +1,7 @@
|
|||
#include "common.cuh"
|
||||
#include "fattn-common.cuh"
|
||||
#include "fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_INST(64, 1, 4, (vec_dot_fattn_vec_KQ_f16<half, 64>), false, dequantize_1_f16<half>);
|
||||
DECL_FATTN_VEC_F16_INST(128, 1, 4, (vec_dot_fattn_vec_KQ_f16<half, 128>), false, dequantize_1_f16<half>);
|
||||
DECL_FATTN_VEC_F16_INST(256, 1, 4, (vec_dot_fattn_vec_KQ_f16<half, 256>), false, dequantize_1_f16<half>);
|
Loading…
Add table
Add a link
Reference in a new issue