From 5668c79ea092b7bff95e1fce96e3de717c31349d Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Wed, 17 Apr 2024 23:26:29 +0200 Subject: [PATCH] server: bench: enable flash_attn param --- examples/server/bench/bench.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index 6ca637bdd..86c5de101 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -268,6 +268,7 @@ def start_server_background(args): server_args.extend(['--defrag-thold', "0.1"]) server_args.append('--cont-batching') server_args.append('--metrics') + server_args.append('--flash-attn') server_args.extend(['--log-format', "text"]) args = [str(arg) for arg in [server_path, *server_args]] print(f"bench: starting server with: {' '.join(args)}")