server : don't overfill the batch during infill (#10018)

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-10-28 08:49:32 +02:00 committed by GitHub
parent 8841ce3f43
commit 8125e6cbfc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 5 additions and 2 deletions

View file

@ -1880,6 +1880,7 @@ struct server_context {
if (slot.state == SLOT_STATE_STARTED) {
slot.t_start_process_prompt = ggml_time_us();
slot.t_start_generation = 0;
slot.n_past = 0;
slot.n_prompt_tokens = prompt_tokens.size();
slot.state = SLOT_STATE_PROCESSING_PROMPT;