server: tests: add a constant seed in completion request

This commit is contained in:
Pierrick HYMBERT 2024-02-20 22:55:29 +01:00
parent b9f8390d28
commit 0772884b06
2 changed files with 7 additions and 4 deletions

View file

@ -1,7 +1,7 @@
Feature: llama.cpp server Feature: llama.cpp server
Background: Server startup Background: Server startup
Given a server listening on localhost:8080 with 2 slots Given a server listening on localhost:8080 with 2 slots and 42 as seed
Then the server is starting Then the server is starting
Then the server is healthy Then the server is healthy

View file

@ -7,11 +7,12 @@ import requests
from behave import step from behave import step
@step(u"a server listening on {server_fqdn}:{server_port} with {n_slots} slots") @step(u"a server listening on {server_fqdn}:{server_port} with {n_slots} slots and {seed} as seed")
def step_server_config(context, server_fqdn, server_port, n_slots): def step_server_config(context, server_fqdn, server_port, n_slots, seed):
context.server_fqdn = server_fqdn context.server_fqdn = server_fqdn
context.server_port = int(server_port) context.server_port = int(server_port)
context.n_slots = int(n_slots) context.n_slots = int(n_slots)
context.seed = int(seed)
context.base_url = f'http://{context.server_fqdn}:{context.server_port}' context.base_url = f'http://{context.server_fqdn}:{context.server_port}'
context.completions = [] context.completions = []
@ -154,6 +155,7 @@ def request_completion(context, prompt, n_predict=None):
response = requests.post(f'{context.base_url}/completion', json={ response = requests.post(f'{context.base_url}/completion', json={
"prompt": prompt, "prompt": prompt,
"n_predict": int(n_predict) if n_predict is not None else 4096, "n_predict": int(n_predict) if n_predict is not None else 4096,
"seed": context.seed
}) })
assert response.status_code == 200 assert response.status_code == 200
context.completions.append(response.json()) context.completions.append(response.json())
@ -173,7 +175,8 @@ def oai_chat_completions(context, user_prompt):
], ],
model=context.model, model=context.model,
max_tokens=context.max_tokens, max_tokens=context.max_tokens,
stream=context.enable_streaming stream=context.enable_streaming,
seed = context.seed
) )
if context.enable_streaming: if context.enable_streaming:
completion_response = { completion_response = {