diff --git a/llama.cpp b/llama.cpp index 8c906a22f..02028b900 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8004,7 +8004,8 @@ void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * can if (k == (int) candidates->size) { std::sort(candidates->data, candidates->data + candidates->size, comp); } else { - std::partial_sort(candidates->data, candidates->data + k, candidates->data + candidates->size, comp); + std::nth_element(candidates->data, candidates->data + k, candidates->data + candidates->size, comp); // separate stack to top-k + std::sort(candidates->data, candidates->data + k, comp); // Sort the top-k stack } candidates->sorted = true; }