server: bench: enable flash_attn param

This commit is contained in:
Pierrick HYMBERT 2024-04-17 23:26:29 +02:00
parent 405385726e
commit 5668c79ea0

View file

@ -268,6 +268,7 @@ def start_server_background(args):
server_args.extend(['--defrag-thold', "0.1"])
server_args.append('--cont-batching')
server_args.append('--metrics')
server_args.append('--flash-attn')
server_args.extend(['--log-format', "text"])
args = [str(arg) for arg in [server_path, *server_args]]
print(f"bench: starting server with: {' '.join(args)}")