use nullptr in llama_sample_softmax call during llama_sample_entropy

this avoids counting the time taken stats twice

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
l3utterfly 2024-01-22 22:12:33 +09:00 committed by GitHub
parent babb76a33a
commit a98a49836c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -7786,7 +7786,7 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
void llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float min_temp, float max_temp, float exponent_val) {
const int64_t t_start_sample_us = ggml_time_us();
llama_sample_softmax(ctx, candidates_p);
llama_sample_softmax(nullptr, candidates_p);
// Calculate entropy of the softmax probabilities
float entropy = 0.0f;