diff --git a/llama.cpp b/llama.cpp index 72457ec0c..27a6277eb 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8371,12 +8371,12 @@ void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * can // return; // } + const int64_t t_start_sample_us = ggml_time_us(); + if (k <= 0) { k = candidates->size; } - const int64_t t_start_sample_us = ggml_time_us(); - k = std::max(k, (int) min_keep); k = std::min(k, (int) candidates->size);