export llama_timings as struct and expose them in server

This commit is contained in:
Tobias Lütke 2023-07-04 21:52:04 -04:00
parent a76ce02a6c
commit 30d973dc42
No known key found for this signature in database
GPG key ID: 1FC0DBB14164709A
7 changed files with 1631 additions and 1309 deletions

15
llama.h
View file

@ -134,6 +134,20 @@ extern "C" {
bool quantize_output_tensor; // quantize output.weight
} llama_model_quantize_params;
// performance timing information
struct llama_timings {
double t_start_ms;
double t_end_ms;
double load_time_ms;
double sample_time_ms;
double prompt_eval_time_ms;
double eval_time_ms;
int32_t n_sample;
int32_t n_p_eval;
int32_t n_eval;
};
LLAMA_API struct llama_context_params llama_context_default_params();
LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params();
@ -331,6 +345,7 @@ extern "C" {
LLAMA_API llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates);
// Performance information
LLAMA_API llama_timings llama_get_timings(struct llama_context * ctx);
LLAMA_API void llama_print_timings(struct llama_context * ctx);
LLAMA_API void llama_reset_timings(struct llama_context * ctx);