server/bench:

- fix when prometheus not started
- wait for server to be ready before starting bench
This commit is contained in:
Pierrick HYMBERT 2024-12-27 11:11:14 +01:00
parent 1bf38cffdf
commit fab46ca1ae

View file

@ -189,12 +189,12 @@ xychart-beta
"pp": { "pp": {
"p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2), "p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
"avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2), "avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2), "0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2) if 'prompt_tokens_seconds' in prometheus_metrics else 0,
}, },
"tg": { "tg": {
"p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2), "p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2), "avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2), "0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2) if 'predicted_tokens_seconds' in prometheus_metrics else 0,
}, },
} }
with open("results.github.env", 'a') as github_env: with open("results.github.env", 'a') as github_env:
@ -234,7 +234,7 @@ def start_server(args):
server_process = start_server_background(args) server_process = start_server_background(args)
attempts = 0 attempts = 0
max_attempts = 20 max_attempts = 600
if 'GITHUB_ACTIONS' in os.environ: if 'GITHUB_ACTIONS' in os.environ:
max_attempts *= 2 max_attempts *= 2
@ -245,7 +245,15 @@ def start_server(args):
print(f"bench: waiting for server to start ...") print(f"bench: waiting for server to start ...")
time.sleep(0.5) time.sleep(0.5)
print("bench: server started.") attempts = 0
while not is_server_ready(args.host, args.port):
attempts += 1
if attempts > max_attempts:
assert False, "server not ready"
print(f"bench: waiting for server to be ready ...")
time.sleep(0.5)
print("bench: server started and ready.")
return server_process return server_process
@ -258,11 +266,6 @@ def start_server_background(args):
'--host', args.host, '--host', args.host,
'--port', args.port, '--port', args.port,
] ]
model_file = args.model_path_prefix + os.path.sep + args.hf_file
model_dir = os.path.dirname(model_file)
if not os.path.exists(model_dir):
os.makedirs(model_dir)
server_args.extend(['--model', model_file])
server_args.extend(['--hf-repo', args.hf_repo]) server_args.extend(['--hf-repo', args.hf_repo])
server_args.extend(['--hf-file', args.hf_file]) server_args.extend(['--hf-file', args.hf_file])
server_args.extend(['--n-gpu-layers', args.n_gpu_layers]) server_args.extend(['--n-gpu-layers', args.n_gpu_layers])
@ -306,6 +309,12 @@ def is_server_listening(server_fqdn, server_port):
return _is_server_listening return _is_server_listening
def is_server_ready(server_fqdn, server_port):
url = f"http://{server_fqdn}:{server_port}/health"
response = requests.get(url)
return response.status_code == 200
def escape_metric_name(metric_name): def escape_metric_name(metric_name):
return re.sub('[^A-Z0-9]', '_', metric_name.upper()) return re.sub('[^A-Z0-9]', '_', metric_name.upper())