mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-03-16 05:46:25 +00:00
Fix load time measurement
This commit is contained in:
parent
290a49952e
commit
bb3ebedfce
1 changed files with 7 additions and 2 deletions
9
third_party/ggml/llama.cc
vendored
9
third_party/ggml/llama.cc
vendored
|
@ -2690,13 +2690,18 @@ int llama_eval(
|
||||||
int n_tokens,
|
int n_tokens,
|
||||||
int n_past,
|
int n_past,
|
||||||
int n_threads) {
|
int n_threads) {
|
||||||
|
int64_t start_eval = 0;
|
||||||
|
if (!ctx->has_evaluated_once) {
|
||||||
|
start_eval = ggml_time_us();
|
||||||
|
}
|
||||||
if (!llama_eval_internal(*ctx, tokens, n_tokens, n_past, n_threads)) {
|
if (!llama_eval_internal(*ctx, tokens, n_tokens, n_past, n_threads)) {
|
||||||
fprintf(stderr, "%s: failed to eval\n", __func__);
|
fprintf(stderr, "%s: failed to eval\n", __func__);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
// get a more accurate load time, upon first eval
|
// get a more accurate load time by measuring the first eval
|
||||||
|
// this will take into consideration any page fault slowdown
|
||||||
if (!ctx->has_evaluated_once) {
|
if (!ctx->has_evaluated_once) {
|
||||||
ctx->t_load_us = ggml_time_us() - ctx->t_start_us;
|
ctx->t_load_us += ggml_time_us() - start_eval;
|
||||||
ctx->has_evaluated_once = true;
|
ctx->has_evaluated_once = true;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Add table
Reference in a new issue