diff --git a/examples/server/tests/unit/test_basic.py b/examples/server/tests/unit/test_basic.py index b8d68989b..22c6fe545 100644 --- a/examples/server/tests/unit/test_basic.py +++ b/examples/server/tests/unit/test_basic.py @@ -23,6 +23,10 @@ def test_server_props(): res = server.make_request("GET", "/props") assert res.status_code == 200 assert res.body["total_slots"] == server.n_slots + default_val = res.body["default_generation_settings"] + assert server.n_ctx is not None and server.n_slots is not None + assert default_val["n_ctx"] == server.n_ctx / server.n_slots + assert default_val["params"]["seed"] == server.seed def test_server_models(): @@ -36,12 +40,26 @@ def test_server_models(): def test_server_slots(): global server + + # without slots endpoint enabled, this should return error + server.server_slots = False + server.start() + res = server.make_request("GET", "/slots") + assert res.status_code == 501 # ERROR_TYPE_NOT_SUPPORTED + assert "error" in res.body + server.stop() + + # with slots endpoint enabled, this should return slots info server.server_slots = True + server.n_slots = 2 server.start() res = server.make_request("GET", "/slots") assert res.status_code == 200 assert len(res.body) == server.n_slots - assert res.body[0]["n_ctx"] > 0 + assert server.n_ctx is not None and server.n_slots is not None + assert res.body[0]["n_ctx"] == server.n_ctx / server.n_slots + assert "params" in res.body[0] + assert res.body[0]["params"]["seed"] == server.seed def test_load_split_model(): diff --git a/examples/server/tests/utils.py b/examples/server/tests/utils.py index c352943f2..69215eaa4 100644 --- a/examples/server/tests/utils.py +++ b/examples/server/tests/utils.py @@ -92,7 +92,6 @@ class ServerProcess: else: server_path = "../../../build/bin/llama-server" server_args = [ - "--slots", # requires to get slot status via /slots endpoint "--host", self.server_host, "--port", @@ -184,7 +183,7 @@ class ServerProcess: start_time = time.time() while time.time() - start_time < timeout_seconds: try: - response = self.make_request("GET", "/slots", headers={ + response = self.make_request("GET", "/health", headers={ "Authorization": f"Bearer {self.api_key}" if self.api_key else None }) if response.status_code == 200: @@ -227,7 +226,7 @@ class ServerProcess: result.headers = dict(response.headers) result.status_code = response.status_code result.body = response.json() if parse_body else None - print("Response from server", result.body) + print("Response from server", json.dumps(result.body, indent=2)) return result def make_stream_request( @@ -248,7 +247,7 @@ class ServerProcess: break elif line.startswith('data: '): data = json.loads(line[6:]) - print("Partial response from server", data) + print("Partial response from server", json.dumps(data, indent=2)) yield data