From 405385726ef7432b65b9e63dd7a63c18765eb376 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Wed, 17 Apr 2024 14:05:02 +0200 Subject: [PATCH] server: support flash_attn param --- examples/server/server.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 634e653ad..f1754b60b 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2722,6 +2722,8 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams, params.embedding = true; } else if (arg == "-cb" || arg == "--cont-batching") { params.cont_batching = true; + } else if (arg == "-fa" || arg == "--flash-attn") { + params.flash_attn = true; } else if (arg == "-np" || arg == "--parallel") { if (++i >= argc) { invalid_param = true;