server/bench:
- fix when prometheus not started - wait for server to be ready before starting bench
This commit is contained in:
parent
1bf38cffdf
commit
fab46ca1ae
1 changed files with 18 additions and 9 deletions
|
@ -189,12 +189,12 @@ xychart-beta
|
||||||
"pp": {
|
"pp": {
|
||||||
"p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
|
"p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
|
||||||
"avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
|
"avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
|
||||||
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
|
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2) if 'prompt_tokens_seconds' in prometheus_metrics else 0,
|
||||||
},
|
},
|
||||||
"tg": {
|
"tg": {
|
||||||
"p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
|
"p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
|
||||||
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
|
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
|
||||||
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
|
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2) if 'predicted_tokens_seconds' in prometheus_metrics else 0,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
with open("results.github.env", 'a') as github_env:
|
with open("results.github.env", 'a') as github_env:
|
||||||
|
@ -234,7 +234,7 @@ def start_server(args):
|
||||||
server_process = start_server_background(args)
|
server_process = start_server_background(args)
|
||||||
|
|
||||||
attempts = 0
|
attempts = 0
|
||||||
max_attempts = 20
|
max_attempts = 600
|
||||||
if 'GITHUB_ACTIONS' in os.environ:
|
if 'GITHUB_ACTIONS' in os.environ:
|
||||||
max_attempts *= 2
|
max_attempts *= 2
|
||||||
|
|
||||||
|
@ -245,7 +245,15 @@ def start_server(args):
|
||||||
print(f"bench: waiting for server to start ...")
|
print(f"bench: waiting for server to start ...")
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
print("bench: server started.")
|
attempts = 0
|
||||||
|
while not is_server_ready(args.host, args.port):
|
||||||
|
attempts += 1
|
||||||
|
if attempts > max_attempts:
|
||||||
|
assert False, "server not ready"
|
||||||
|
print(f"bench: waiting for server to be ready ...")
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
print("bench: server started and ready.")
|
||||||
return server_process
|
return server_process
|
||||||
|
|
||||||
|
|
||||||
|
@ -258,11 +266,6 @@ def start_server_background(args):
|
||||||
'--host', args.host,
|
'--host', args.host,
|
||||||
'--port', args.port,
|
'--port', args.port,
|
||||||
]
|
]
|
||||||
model_file = args.model_path_prefix + os.path.sep + args.hf_file
|
|
||||||
model_dir = os.path.dirname(model_file)
|
|
||||||
if not os.path.exists(model_dir):
|
|
||||||
os.makedirs(model_dir)
|
|
||||||
server_args.extend(['--model', model_file])
|
|
||||||
server_args.extend(['--hf-repo', args.hf_repo])
|
server_args.extend(['--hf-repo', args.hf_repo])
|
||||||
server_args.extend(['--hf-file', args.hf_file])
|
server_args.extend(['--hf-file', args.hf_file])
|
||||||
server_args.extend(['--n-gpu-layers', args.n_gpu_layers])
|
server_args.extend(['--n-gpu-layers', args.n_gpu_layers])
|
||||||
|
@ -306,6 +309,12 @@ def is_server_listening(server_fqdn, server_port):
|
||||||
return _is_server_listening
|
return _is_server_listening
|
||||||
|
|
||||||
|
|
||||||
|
def is_server_ready(server_fqdn, server_port):
|
||||||
|
url = f"http://{server_fqdn}:{server_port}/health"
|
||||||
|
response = requests.get(url)
|
||||||
|
return response.status_code == 200
|
||||||
|
|
||||||
|
|
||||||
def escape_metric_name(metric_name):
|
def escape_metric_name(metric_name):
|
||||||
return re.sub('[^A-Z0-9]', '_', metric_name.upper())
|
return re.sub('[^A-Z0-9]', '_', metric_name.upper())
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue