From 2387dbea7d3417218faf7507eb9ff4eece396717 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 7 Sep 2024 14:50:43 +0300 Subject: [PATCH] sampling : fix repeat penalty out-of-bounds access ggml-ci --- examples/server/server.cpp | 8 +++----- src/llama-sampling.cpp | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 1095f43b2..f45b59983 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2323,10 +2323,10 @@ struct server_context { slot.release(); slot.i_batch = -1; continue; // continue loop of slots - } else { - // prompt evaluated for next-token prediction - slot.state = SLOT_STATE_GENERATING; } + + // prompt evaluated for next-token prediction + slot.state = SLOT_STATE_GENERATING; } else if (slot.state != SLOT_STATE_GENERATING) { continue; // continue loop of slots } @@ -2347,8 +2347,6 @@ struct server_context { const auto * cur_p = gpt_sampler_get_candidates(slot.smpl); - // TODO: this logic might have been broken during https://github.com/ggerganov/llama.cpp/pull/8643 - // fix if necessary for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) { result.probs.push_back({ cur_p->data[i].id, diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index 02b93b64c..61f4cbb92 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -1280,7 +1280,7 @@ static struct llama_sampler_i llama_sampler_penalties_i = { // Create a frequency map to count occurrences of each token in last_tokens // TODO: optimize this by maintaining the token count in the sampler context llama_token_cnt token_count; - for (int i = 0; i < ctx->penalty_last_n; ++i) { + for (int i = 0; i < std::min(ctx->penalty_last_n, ctx->prev.size()); ++i) { token_count[ctx->prev.rat(i)]++; }