16 cols for Phi-2

This commit is contained in:
Johannes Gäßler 2024-03-30 09:19:19 +01:00 committed by Georgi Gerganov
parent 75aa7b4b18
commit d59ac670bf

View file

@ -579,7 +579,8 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst
return;
}
int cols_per_block;
int cols_per_block = 16;
if (Q->ne[0] % 32 == 0) {
if (Q->ne[1] >= 128 && Q->ne[0] <= 128) {
cols_per_block = 64;
} else if (Q->ne[1] >= 64) {
@ -589,6 +590,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst
} else {
cols_per_block = 8;
}
}
const int frag_m = cols_per_block == 8 ? 32 : 16;
const int nwarps = (Q->ne[0] <= 128 || cols_per_block == 8 ? Q->ne[0] : Q->ne[0]/2) / frag_m;
const dim3 blocks_num((Q->ne[1] + cols_per_block - 1) / cols_per_block, Q->ne[2], Q->ne[3]);