CUDA: quantized KV support for FA vec
This commit is contained in:
parent
10b1e45876
commit
672244a88b
11 changed files with 826 additions and 142 deletions
|
@ -36,6 +36,9 @@ static __global__ void flash_attn_tile_ext_f16(
|
|||
const int nb11,
|
||||
const int nb12,
|
||||
const int nb13,
|
||||
const int nb21,
|
||||
const int nb22,
|
||||
const int nb23,
|
||||
const int ne0,
|
||||
const int ne1,
|
||||
const int ne2,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue