fix server embedding test
This commit is contained in:
parent
1ac668e4ec
commit
4ddccc2852
2 changed files with 9 additions and 0 deletions
|
@ -9,6 +9,7 @@ Feature: llama.cpp server
|
||||||
And 42 as server seed
|
And 42 as server seed
|
||||||
And 2 slots
|
And 2 slots
|
||||||
And 1024 as batch size
|
And 1024 as batch size
|
||||||
|
And 1024 as ubatch size
|
||||||
And 2048 KV cache size
|
And 2048 KV cache size
|
||||||
And embeddings extraction
|
And embeddings extraction
|
||||||
Then the server is starting
|
Then the server is starting
|
||||||
|
|
|
@ -33,6 +33,7 @@ def step_server_config(context, server_fqdn, server_port):
|
||||||
|
|
||||||
context.model_alias = None
|
context.model_alias = None
|
||||||
context.n_batch = None
|
context.n_batch = None
|
||||||
|
context.n_ubatch = None
|
||||||
context.n_ctx = None
|
context.n_ctx = None
|
||||||
context.n_ga = None
|
context.n_ga = None
|
||||||
context.n_ga_w = None
|
context.n_ga_w = None
|
||||||
|
@ -278,6 +279,11 @@ def step_n_batch(context, n_batch):
|
||||||
context.n_batch = n_batch
|
context.n_batch = n_batch
|
||||||
|
|
||||||
|
|
||||||
|
@step('{n_ubatch:d} as ubatch size')
|
||||||
|
def step_n_ubatch(context, n_ubatch):
|
||||||
|
context.n_ubatch = n_ubatch
|
||||||
|
|
||||||
|
|
||||||
@step('{seed:d} as seed')
|
@step('{seed:d} as seed')
|
||||||
def step_seed(context, seed):
|
def step_seed(context, seed):
|
||||||
context.seed = seed
|
context.seed = seed
|
||||||
|
@ -1029,6 +1035,8 @@ def start_server_background(context):
|
||||||
]
|
]
|
||||||
if context.n_batch:
|
if context.n_batch:
|
||||||
server_args.extend(['--batch-size', context.n_batch])
|
server_args.extend(['--batch-size', context.n_batch])
|
||||||
|
if context.n_ubatch:
|
||||||
|
server_args.extend(['--ubatch-size', context.n_ubatch])
|
||||||
if context.n_gpu_layer:
|
if context.n_gpu_layer:
|
||||||
server_args.extend(['--n-gpu-layers', context.n_gpu_layer])
|
server_args.extend(['--n-gpu-layers', context.n_gpu_layer])
|
||||||
if context.server_continuous_batching:
|
if context.server_continuous_batching:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue