llama : llama_perf + option to disable timings during decode (#9355)
* llama : llama_perf + option to disable timings during decode ggml-ci * common : add llama_arg * Update src/llama.cpp Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com> * perf : separate functions in the API ggml-ci * perf : safer pointer handling + naming update ggml-ci * minor : better local var name * perf : abort on invalid sampler pointer ggml-ci --------- Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
This commit is contained in:
parent
bd35cb0ae3
commit
0abc6a2c25
23 changed files with 135 additions and 91 deletions
|
@ -1669,3 +1669,37 @@ uint32_t llama_sampler_get_seed(const struct llama_sampler * smpl) {
|
|||
|
||||
return LLAMA_DEFAULT_SEED;
|
||||
}
|
||||
|
||||
// perf
|
||||
|
||||
struct llama_perf_sampler_data llama_perf_sampler(const struct llama_sampler * chain) {
|
||||
struct llama_perf_sampler_data data = {};
|
||||
|
||||
if (chain == nullptr || chain->iface != &llama_sampler_chain_i) {
|
||||
GGML_ABORT("%s: invalid sampler passed - requires a sampler created with llama_sampler_chain_init()\n", __func__);
|
||||
}
|
||||
|
||||
const auto * ctx = (const struct llama_sampler_chain *) chain->ctx;
|
||||
|
||||
data.t_sample_ms = 1e-3 * ctx->t_sample_us;
|
||||
data.n_sample = std::max(0, ctx->n_sample);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
void llama_perf_sampler_print(const struct llama_sampler * chain) {
|
||||
const auto data = llama_perf_sampler(chain);
|
||||
|
||||
LLAMA_LOG_INFO("%s: sampling time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
__func__, data.t_sample_ms, data.n_sample, data.t_sample_ms / data.n_sample, 1e3 / data.t_sample_ms * data.n_sample);
|
||||
}
|
||||
|
||||
void llama_perf_sampler_reset(struct llama_sampler * chain) {
|
||||
if (chain == nullptr || chain->iface != &llama_sampler_chain_i) {
|
||||
GGML_ABORT("%s: invalid sampler passed - requires a sampler created with llama_sampler_chain_init()\n", __func__);
|
||||
}
|
||||
|
||||
auto * ctx = (struct llama_sampler_chain *) chain->ctx;
|
||||
|
||||
ctx->t_sample_us = ctx->n_sample = 0;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue