add test for slots endpoint
This commit is contained in:
parent
db97c8b19b
commit
9bb1ae6bea
4 changed files with 17 additions and 2 deletions
|
@ -2184,6 +2184,7 @@ struct server_context {
|
|||
|
||||
auto res = std::make_unique<server_task_result_metrics>();
|
||||
res->id = task.id;
|
||||
res->slots_data = slots_data;
|
||||
res->n_idle_slots = n_idle_slots;
|
||||
res->n_processing_slots = n_processing_slots;
|
||||
res->n_tasks_deferred = queue_tasks.queue_tasks_deferred.size();
|
||||
|
|
|
@ -33,6 +33,17 @@ def test_server_models():
|
|||
assert len(res.body["data"]) == 1
|
||||
assert res.body["data"][0]["id"] == server.model_alias
|
||||
|
||||
|
||||
def test_server_slots():
|
||||
global server
|
||||
server.server_slots = True
|
||||
server.start()
|
||||
res = server.make_request("GET", "/slots")
|
||||
assert res.status_code == 200
|
||||
assert len(res.body) == server.n_slots
|
||||
assert res.body[0]["n_ctx"] > 0
|
||||
|
||||
|
||||
def test_load_split_model():
|
||||
global server
|
||||
server.model_hf_repo = "ggml-org/models"
|
||||
|
|
|
@ -30,7 +30,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
|
|||
],
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert "cmpl" in res.body["id"]
|
||||
assert "cmpl" in res.body["id"] # make sure the completion id has the expected format
|
||||
assert res.body["model"] == model if model is not None else server.model_alias
|
||||
assert res.body["usage"]["prompt_tokens"] == n_prompt
|
||||
assert res.body["usage"]["completion_tokens"] == n_predicted
|
||||
|
@ -66,7 +66,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte
|
|||
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
|
||||
if last_cmpl_id is None:
|
||||
last_cmpl_id = data["id"]
|
||||
assert last_cmpl_id == data["id"]
|
||||
assert last_cmpl_id == data["id"] # make sure the completion id is the same for all events in the stream
|
||||
if choice["finish_reason"] in ["stop", "length"]:
|
||||
assert data["usage"]["prompt_tokens"] == n_prompt
|
||||
assert data["usage"]["completion_tokens"] == n_predicted
|
||||
|
|
|
@ -64,6 +64,7 @@ class ServerProcess:
|
|||
server_embeddings: bool | None = False
|
||||
server_reranking: bool | None = False
|
||||
server_metrics: bool | None = False
|
||||
server_slots: bool | None = False
|
||||
draft: int | None = None
|
||||
api_key: str | None = None
|
||||
response_format: str | None = None
|
||||
|
@ -129,6 +130,8 @@ class ServerProcess:
|
|||
server_args.append("--reranking")
|
||||
if self.server_metrics:
|
||||
server_args.append("--metrics")
|
||||
if self.server_slots:
|
||||
server_args.append("--slots")
|
||||
if self.model_alias:
|
||||
server_args.extend(["--alias", self.model_alias])
|
||||
if self.n_ctx:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue