server : bach has to be allocated for n_parallel sequences
This commit is contained in:
parent
6b2437e32d
commit
113dd60005
1 changed files with 3 additions and 1 deletions
|
@ -631,7 +631,9 @@ struct llama_server_context
|
||||||
LOG_TEE(" -> Slot %i - max context: %i\n", slot.id, max_ctx_per_slot);
|
LOG_TEE(" -> Slot %i - max context: %i\n", slot.id, max_ctx_per_slot);
|
||||||
slots.push_back(slot);
|
slots.push_back(slot);
|
||||||
}
|
}
|
||||||
batch = llama_batch_init(n_ctx, 0, 1);
|
|
||||||
|
batch = llama_batch_init(n_ctx, 0, params.n_parallel);
|
||||||
|
|
||||||
// empty system prompt
|
// empty system prompt
|
||||||
system_prompt = "";
|
system_prompt = "";
|
||||||
num_tokens_system = 0;
|
num_tokens_system = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue