server: support flash_attn param

This commit is contained in:
Pierrick HYMBERT 2024-04-17 14:05:02 +02:00
parent 599ce84a71
commit 405385726e

View file

@ -2722,6 +2722,8 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
params.embedding = true; params.embedding = true;
} else if (arg == "-cb" || arg == "--cont-batching") { } else if (arg == "-cb" || arg == "--cont-batching") {
params.cont_batching = true; params.cont_batching = true;
} else if (arg == "-fa" || arg == "--flash-attn") {
params.flash_attn = true;
} else if (arg == "-np" || arg == "--parallel") { } else if (arg == "-np" || arg == "--parallel") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;