From 751591d52074d6be53feed6e19211932e4520159 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 23 Apr 2024 18:16:25 +0300 Subject: [PATCH] server : add help for --flash-attn arg --- examples/server/server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index f1754b60b..2cf59fbe0 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2357,6 +2357,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co printf(" --embeddings enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled"); printf(" -np N, --parallel N number of slots for process requests (default: %d)\n", params.n_parallel); printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: enabled)\n"); + printf(" -fa, --flash-attn enable Flash Attention (default: %s)\n", params.flash_attn ? "enabled" : "disabled"); printf(" -spf FNAME, --system-prompt-file FNAME\n"); printf(" set a file to load a system prompt (initial prompt of all slots), this is useful for chat applications.\n"); printf(" -ctk TYPE, --cache-type-k TYPE\n");