From 8e66e59cdd994931dff59b5a14e5fe4edb6c5612 Mon Sep 17 00:00:00 2001 From: Bach Le Date: Sat, 8 Jul 2023 00:07:49 +0800 Subject: [PATCH] Record sampling time in llama_sample_classifier_free_guidance --- llama.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llama.cpp b/llama.cpp index f96c9d143..cdfb1bbb6 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2163,6 +2163,8 @@ void llama_sample_classifier_free_guidance( struct llama_context * guidance_ctx, float scale, float smooth_factor) { + int64_t t_start_sample_us = t_start_sample_us = ggml_time_us(); + assert(ctx); auto n_vocab = llama_n_vocab(ctx); assert(n_vocab == (int)candidates->size); @@ -2195,6 +2197,10 @@ void llama_sample_classifier_free_guidance( candidates->data[i].logit = smooth_factor * guidance_logit + (1.f - smooth_factor) * base_logit; } + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } } llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int m, float * mu) {