From a98a49836ce45e3ad9d32fde68a6008b8052cd07 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Mon, 22 Jan 2024 22:12:33 +0900 Subject: [PATCH] use nullptr in llama_sample_softmax call during llama_sample_entropy this avoids counting the time taken stats twice Co-authored-by: Georgi Gerganov --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index d1519214b..be3dd74c7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7786,7 +7786,7 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c void llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float min_temp, float max_temp, float exponent_val) { const int64_t t_start_sample_us = ggml_time_us(); - llama_sample_softmax(ctx, candidates_p); + llama_sample_softmax(nullptr, candidates_p); // Calculate entropy of the softmax probabilities float entropy = 0.0f;