From a9f900b64504652b04bf4b278cef142c94db1cb5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 22 Mar 2023 07:22:51 +0200 Subject: [PATCH] Measure eval time only for single-token calls --- llama.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index c9edb84f5..8b7006a1d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -836,8 +836,11 @@ static bool llama_eval_internal( ggml_free(ctx0); - lctx.t_eval_us += ggml_time_us() - t_start_us; - lctx.n_eval++; + // measure the performance only for the single-token evals + if (N == 1) { + lctx.t_eval_us += ggml_time_us() - t_start_us; + lctx.n_eval++; + } return true; }