diff --git a/llama.cpp b/llama.cpp index e742a1406..d311f5461 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7372,17 +7372,30 @@ void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * can // Calculate the minimum percentage requirement. multiplied_min_p = base_min_p * multiplication_factor; - printf("Base min_p value: %f\n", base_min_p); - printf("Calculated multiplied_min_p (threshold) value: %f\n", multiplied_min_p); // Store the tokens that meet the threshold in a new list. std::vector filtered_candidates; filtered_candidates.reserve(candidates->size); // Reserve to avoid multiple reallocations + size_t kept_count = 0; // Counter for how many tokens are kept + for (size_t i = 0; i < candidates->size; ++i) { // If a token's probability is above the threshold, we keep it. if (candidates->data[i].p >= multiplied_min_p) { filtered_candidates.push_back(candidates->data[i]); + kept_count++; // Increment the counter + } + } + + // If not enough candidates meet the threshold, take the top 'min_keep' ones + if (kept_count < min_keep) { + std::sort(candidates->data, candidates->data + candidates->size, + [](const llama_token_data & a, const llama_token_data & b) { + return a.p > b.p; // Sort by probability in descending order + }); + filtered_candidates.clear(); // Clear the previously filtered candidates + for (size_t i = 0; i < min_keep; ++i) { + filtered_candidates.push_back(candidates->data[i]); } }