server : enable cache_prompt by default (#10501)

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-11-25 21:50:07 +02:00 committed by GitHub
parent 106964e3d2
commit 47f931c8f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 3 additions and 3 deletions

View file

@ -111,7 +111,7 @@ struct server_static_file {
struct slot_params {
bool stream = true;
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt
int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
@ -883,7 +883,7 @@ struct server_context {
}
slot.params.stream = json_value(data, "stream", false);
slot.params.cache_prompt = json_value(data, "cache_prompt", false);
slot.params.cache_prompt = json_value(data, "cache_prompt", true);
slot.params.n_predict = json_value(data, "n_predict", json_value(data, "max_tokens", defaults.n_predict));
slot.params.n_indent = json_value(data, "n_indent", defaults.n_indent);
slot.params.n_keep = json_value(data, "n_keep", defaults.n_keep);