use nullptr in llama_sample_softmax call during llama_sample_entropy

this avoids counting the time taken stats twice Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-01-22 22:12:33 +09:00 · 2024-01-22 22:12:33 +09:00 · a98a49836c
commit a98a49836c
parent babb76a33a
1 changed files with 1 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -7786,7 +7786,7 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
 void llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float min_temp, float max_temp, float exponent_val) {
    const int64_t t_start_sample_us = ggml_time_us();
-    llama_sample_softmax(ctx, candidates_p);
+    llama_sample_softmax(nullptr, candidates_p);
    // Calculate entropy of the softmax probabilities
    float entropy = 0.0f;