don't shift if there's no truncation

This commit is contained in:
Paulo 2024-05-02 21:28:47 -03:00
parent 0c115da251
commit 4a471b12d6

View file

@ -103,7 +103,7 @@ struct slot_params {
uint32_t seed = -1; // RNG seed
int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
int32_t n_truncate = 0;
int32_t n_truncate = 0; // number of tokens after n_keep that will be discarded when the prompt is bigger than the context
int32_t n_predict = -1; // new tokens to predict
std::vector<std::string> antiprompt;
@ -2057,7 +2057,12 @@ struct server_context {
{"new_cache_size", new_cache_size},
{"cache_tokens", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cend())},
});
} // else somebody trying to use n_truncate w/o previous cache
} else {
LOG_ERROR("n_truncate needs to be used with cache_prompt", {
{"id_slot", slot.id},
{"id_task", slot.id_task},
});
}
}
GGML_ASSERT(slot.n_prompt_tokens < slot.n_ctx);
@ -2074,7 +2079,7 @@ struct server_context {
// reuse any previously computed tokens that are common with the new prompt
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
LOG_INFO("[cached_tokens, prompt_tokens]", {
LOG_INFO("[cache_tokens, prompt_tokens]", {
{ "id_slot", slot.id },
{ "id_task", slot.id_task },
{ "common_part", slot.n_past}
@ -2113,7 +2118,7 @@ struct server_context {
// shift KV cache if needed
const int n_keep = slot.params.n_keep + add_bos_token;
const int n_truncate = slot.params.n_truncate;
if (n_truncate && slot.params.cache_prompt) {
if (n_truncate && slot.params.cache_prompt && slot.truncated) {
llama_kv_cache_seq_rm(ctx, slot.id + 1, n_keep, n_keep + n_truncate);
LOG_INFO("kv cache rm", {