fix server embedding test

This commit is contained in:
slaren 2024-03-12 17:59:59 +01:00
parent 1ac668e4ec
commit 4ddccc2852
2 changed files with 9 additions and 0 deletions

View file

@ -9,6 +9,7 @@ Feature: llama.cpp server
And 42 as server seed
And 2 slots
And 1024 as batch size
And 1024 as ubatch size
And 2048 KV cache size
And embeddings extraction
Then the server is starting

View file

@ -33,6 +33,7 @@ def step_server_config(context, server_fqdn, server_port):
context.model_alias = None
context.n_batch = None
context.n_ubatch = None
context.n_ctx = None
context.n_ga = None
context.n_ga_w = None
@ -278,6 +279,11 @@ def step_n_batch(context, n_batch):
context.n_batch = n_batch
@step('{n_ubatch:d} as ubatch size')
def step_n_ubatch(context, n_ubatch):
context.n_ubatch = n_ubatch
@step('{seed:d} as seed')
def step_seed(context, seed):
context.seed = seed
@ -1029,6 +1035,8 @@ def start_server_background(context):
]
if context.n_batch:
server_args.extend(['--batch-size', context.n_batch])
if context.n_ubatch:
server_args.extend(['--ubatch-size', context.n_ubatch])
if context.n_gpu_layer:
server_args.extend(['--n-gpu-layers', context.n_gpu_layer])
if context.server_continuous_batching: