Measure eval time only for single-token calls
This commit is contained in:
parent
71ed3d224d
commit
a9f900b645
1 changed files with 5 additions and 2 deletions
|
@ -836,8 +836,11 @@ static bool llama_eval_internal(
|
||||||
|
|
||||||
ggml_free(ctx0);
|
ggml_free(ctx0);
|
||||||
|
|
||||||
lctx.t_eval_us += ggml_time_us() - t_start_us;
|
// measure the performance only for the single-token evals
|
||||||
lctx.n_eval++;
|
if (N == 1) {
|
||||||
|
lctx.t_eval_us += ggml_time_us() - t_start_us;
|
||||||
|
lctx.n_eval++;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue