sampling : fix repeat penalty out-of-bounds access

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-09-07 14:50:43 +03:00
parent 8a82f388cd
commit 2387dbea7d
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 4 additions and 6 deletions

View file

@ -2323,10 +2323,10 @@ struct server_context {
slot.release(); slot.release();
slot.i_batch = -1; slot.i_batch = -1;
continue; // continue loop of slots continue; // continue loop of slots
} else { }
// prompt evaluated for next-token prediction // prompt evaluated for next-token prediction
slot.state = SLOT_STATE_GENERATING; slot.state = SLOT_STATE_GENERATING;
}
} else if (slot.state != SLOT_STATE_GENERATING) { } else if (slot.state != SLOT_STATE_GENERATING) {
continue; // continue loop of slots continue; // continue loop of slots
} }
@ -2347,8 +2347,6 @@ struct server_context {
const auto * cur_p = gpt_sampler_get_candidates(slot.smpl); const auto * cur_p = gpt_sampler_get_candidates(slot.smpl);
// TODO: this logic might have been broken during https://github.com/ggerganov/llama.cpp/pull/8643
// fix if necessary
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) { for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
result.probs.push_back({ result.probs.push_back({
cur_p->data[i].id, cur_p->data[i].id,

View file

@ -1280,7 +1280,7 @@ static struct llama_sampler_i llama_sampler_penalties_i = {
// Create a frequency map to count occurrences of each token in last_tokens // Create a frequency map to count occurrences of each token in last_tokens
// TODO: optimize this by maintaining the token count in the sampler context // TODO: optimize this by maintaining the token count in the sampler context
llama_token_cnt token_count; llama_token_cnt token_count;
for (int i = 0; i < ctx->penalty_last_n; ++i) { for (int i = 0; i < std::min<int>(ctx->penalty_last_n, ctx->prev.size()); ++i) {
token_count[ctx->prev.rat(i)]++; token_count[ctx->prev.rat(i)]++;
} }