server : (refactor) no more json in server_task input (#10691)
* server : (refactor) no more json in server_task input * add test for slots endpoint * add tests for /props and /slots * remove task inf_type * fix CI by adding safe_json_to_str * add "model_path" to /props * update readme
This commit is contained in:
parent
d9c3ba2b77
commit
3573fa8e7b
6 changed files with 427 additions and 384 deletions
|
@ -64,6 +64,7 @@ class ServerProcess:
|
|||
server_embeddings: bool | None = False
|
||||
server_reranking: bool | None = False
|
||||
server_metrics: bool | None = False
|
||||
server_slots: bool | None = False
|
||||
draft: int | None = None
|
||||
api_key: str | None = None
|
||||
response_format: str | None = None
|
||||
|
@ -91,7 +92,6 @@ class ServerProcess:
|
|||
else:
|
||||
server_path = "../../../build/bin/llama-server"
|
||||
server_args = [
|
||||
"--slots", # requires to get slot status via /slots endpoint
|
||||
"--host",
|
||||
self.server_host,
|
||||
"--port",
|
||||
|
@ -129,6 +129,8 @@ class ServerProcess:
|
|||
server_args.append("--reranking")
|
||||
if self.server_metrics:
|
||||
server_args.append("--metrics")
|
||||
if self.server_slots:
|
||||
server_args.append("--slots")
|
||||
if self.model_alias:
|
||||
server_args.extend(["--alias", self.model_alias])
|
||||
if self.n_ctx:
|
||||
|
@ -181,7 +183,7 @@ class ServerProcess:
|
|||
start_time = time.time()
|
||||
while time.time() - start_time < timeout_seconds:
|
||||
try:
|
||||
response = self.make_request("GET", "/slots", headers={
|
||||
response = self.make_request("GET", "/health", headers={
|
||||
"Authorization": f"Bearer {self.api_key}" if self.api_key else None
|
||||
})
|
||||
if response.status_code == 200:
|
||||
|
@ -224,7 +226,7 @@ class ServerProcess:
|
|||
result.headers = dict(response.headers)
|
||||
result.status_code = response.status_code
|
||||
result.body = response.json() if parse_body else None
|
||||
print("Response from server", result.body)
|
||||
print("Response from server", json.dumps(result.body, indent=2))
|
||||
return result
|
||||
|
||||
def make_stream_request(
|
||||
|
@ -245,7 +247,7 @@ class ServerProcess:
|
|||
break
|
||||
elif line.startswith('data: '):
|
||||
data = json.loads(line[6:])
|
||||
print("Partial response from server", data)
|
||||
print("Partial response from server", json.dumps(data, indent=2))
|
||||
yield data
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue