fix server embedding test

This commit is contained in:
slaren 2024-03-12 17:59:59 +01:00
parent 1ac668e4ec
commit 4ddccc2852
2 changed files with 9 additions and 0 deletions

View file

@ -9,6 +9,7 @@ Feature: llama.cpp server
And 42 as server seed And 42 as server seed
And 2 slots And 2 slots
And 1024 as batch size And 1024 as batch size
And 1024 as ubatch size
And 2048 KV cache size And 2048 KV cache size
And embeddings extraction And embeddings extraction
Then the server is starting Then the server is starting

View file

@ -33,6 +33,7 @@ def step_server_config(context, server_fqdn, server_port):
context.model_alias = None context.model_alias = None
context.n_batch = None context.n_batch = None
context.n_ubatch = None
context.n_ctx = None context.n_ctx = None
context.n_ga = None context.n_ga = None
context.n_ga_w = None context.n_ga_w = None
@ -278,6 +279,11 @@ def step_n_batch(context, n_batch):
context.n_batch = n_batch context.n_batch = n_batch
@step('{n_ubatch:d} as ubatch size')
def step_n_ubatch(context, n_ubatch):
context.n_ubatch = n_ubatch
@step('{seed:d} as seed') @step('{seed:d} as seed')
def step_seed(context, seed): def step_seed(context, seed):
context.seed = seed context.seed = seed
@ -1029,6 +1035,8 @@ def start_server_background(context):
] ]
if context.n_batch: if context.n_batch:
server_args.extend(['--batch-size', context.n_batch]) server_args.extend(['--batch-size', context.n_batch])
if context.n_ubatch:
server_args.extend(['--ubatch-size', context.n_ubatch])
if context.n_gpu_layer: if context.n_gpu_layer:
server_args.extend(['--n-gpu-layers', context.n_gpu_layer]) server_args.extend(['--n-gpu-layers', context.n_gpu_layer])
if context.server_continuous_batching: if context.server_continuous_batching: