From 971fe9f007aab94ac385373d011ef21f114243c2 Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat, 17 Jun 2023 06:54:29 -0500 Subject: [PATCH] add tokens per second output (#246) * add tokens per second output * Update gpttype_adapter.cpp simplify --------- Co-authored-by: LostRuins <39025047+LostRuins@users.noreply.github.com> --- gpttype_adapter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 775f29bb6..98cf3a2b9 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1280,7 +1280,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o float pt1 = (time1*1000.0/(embd_inp.size()==0?1:embd_inp.size())); int realnpredict = params.n_predict-stopper_unused_tokens; float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict)); - printf("\nTime Taken - Processing:%.1fs (%.0fms/T), Generation:%.1fs (%.0fms/T), Total:%.1fs", time1, pt1, time2, pt2, (time1 + time2)); + float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2)); + printf("\nTime Taken - Processing:%.1fs (%.0fms/T), Generation:%.1fs (%.0fms/T), Total:%.1fs (%.1fT/s)", time1, pt1, time2, pt2, (time1 + time2), tokens_per_second); fflush(stdout); output.status = 1; generation_finished = true;