Always sort logits before nucleus sampling

This commit is contained in:
Ivan Stepanov 2023-04-06 17:05:38 +03:00
parent eeaa7b0492
commit 9f0c7cdb8f

View file

@ -1236,19 +1236,13 @@ static llama_vocab::id llama_sample_top_p_top_k(
} }
} }
if (top_k > 0 && top_k < n_logits) { sample_top_k(logits_id, top_k > 0 ? std::min(top_k, n_logits) : n_logits);
sample_top_k(logits_id, top_k);
}
float maxl = -std::numeric_limits<float>::infinity();
for (const auto & kv : logits_id) {
maxl = Max(maxl, kv.first);
}
// compute probs for the top k tokens // compute probs for the top k tokens
std::vector<float> probs; std::vector<float> probs;
probs.reserve(logits_id.size()); probs.reserve(logits_id.size());
float maxl = logits_id[0].first;
double sum = 0.0; double sum = 0.0;
for (const auto & kv : logits_id) { for (const auto & kv : logits_id) {
const float p = expf(kv.first - maxl); const float p = expf(kv.first - maxl);
@ -1272,15 +1266,14 @@ static llama_vocab::id llama_sample_top_p_top_k(
} }
} }
cumsum = 1.0/cumsum;
for (int i = 0; i < (int) probs.size(); i++) { for (int i = 0; i < (int) probs.size(); i++) {
probs[i] *= cumsum; probs[i] /= cumsum;
} }
} }
//printf("\n"); //printf("\n");
//for (int i = 0; i < (int) 10; i++) { //for (int i = 0; i < (int) 10; i++) {
// printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]); // printf("%d: '%s' %f\n", i, lctx.vocab.id_to_token.at(logits_id[i].second).tok.c_str(), probs[i]);
//} //}
//printf("\n\n"); //printf("\n\n");
//exit(0); //exit(0);