server: concurrency fix + monitoring - add /metrics prometheus compatible endpoint (#5708)

* server: monitoring - add /metrics prometheus compatible endpoint

* server: concurrency issue, when 2 task are waiting for results, only one call thread is notified

* server: metrics - move to a dedicated struct
This commit is contained in:
Pierrick Hymbert 2024-02-25 13:49:43 +01:00 committed by GitHub
parent 1289408817
commit d52d7819b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 191 additions and 8 deletions

View file

@ -50,7 +50,7 @@ enum task_type {
TASK_TYPE_COMPLETION,
TASK_TYPE_CANCEL,
TASK_TYPE_NEXT_RESPONSE,
TASK_TYPE_SLOTS_DATA
TASK_TYPE_METRICS
};
struct task_server {
@ -441,7 +441,7 @@ struct llama_server_response {
{
LOG_VERBOSE("queue_results.push_back", {});
queue_results.push_back(result);
condition_results.notify_one();
condition_results.notify_all();
return;
}
}