sampling : fix repeat penalty out-of-bounds access
ggml-ci
This commit is contained in:
parent
8a82f388cd
commit
2387dbea7d
2 changed files with 4 additions and 6 deletions
|
@ -2323,10 +2323,10 @@ struct server_context {
|
|||
slot.release();
|
||||
slot.i_batch = -1;
|
||||
continue; // continue loop of slots
|
||||
} else {
|
||||
// prompt evaluated for next-token prediction
|
||||
slot.state = SLOT_STATE_GENERATING;
|
||||
}
|
||||
|
||||
// prompt evaluated for next-token prediction
|
||||
slot.state = SLOT_STATE_GENERATING;
|
||||
} else if (slot.state != SLOT_STATE_GENERATING) {
|
||||
continue; // continue loop of slots
|
||||
}
|
||||
|
@ -2347,8 +2347,6 @@ struct server_context {
|
|||
|
||||
const auto * cur_p = gpt_sampler_get_candidates(slot.smpl);
|
||||
|
||||
// TODO: this logic might have been broken during https://github.com/ggerganov/llama.cpp/pull/8643
|
||||
// fix if necessary
|
||||
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
|
||||
result.probs.push_back({
|
||||
cur_p->data[i].id,
|
||||
|
|
|
@ -1280,7 +1280,7 @@ static struct llama_sampler_i llama_sampler_penalties_i = {
|
|||
// Create a frequency map to count occurrences of each token in last_tokens
|
||||
// TODO: optimize this by maintaining the token count in the sampler context
|
||||
llama_token_cnt token_count;
|
||||
for (int i = 0; i < ctx->penalty_last_n; ++i) {
|
||||
for (int i = 0; i < std::min<int>(ctx->penalty_last_n, ctx->prev.size()); ++i) {
|
||||
token_count[ctx->prev.rat(i)]++;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue