Measure eval time only for single-token calls
This commit is contained in:
parent
71ed3d224d
commit
a9f900b645
1 changed files with 5 additions and 2 deletions
|
@ -836,8 +836,11 @@ static bool llama_eval_internal(
|
|||
|
||||
ggml_free(ctx0);
|
||||
|
||||
// measure the performance only for the single-token evals
|
||||
if (N == 1) {
|
||||
lctx.t_eval_us += ggml_time_us() - t_start_us;
|
||||
lctx.n_eval++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue