From a9f900b64504652b04bf4b278cef142c94db1cb5 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 22 Mar 2023 07:22:51 +0200
Subject: [PATCH] Measure eval time only for single-token calls

---
 llama.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index c9edb84f5..8b7006a1d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -836,8 +836,11 @@ static bool llama_eval_internal(
 
     ggml_free(ctx0);
 
-    lctx.t_eval_us += ggml_time_us() - t_start_us;
-    lctx.n_eval++;
+    // measure the performance only for the single-token evals
+    if (N == 1) {
+        lctx.t_eval_us += ggml_time_us() - t_start_us;
+        lctx.n_eval++;
+    }
 
     return true;
 }