server : check that the prompt fits in the slot's context (#10030)
ggml-ci
This commit is contained in:
parent
958367bf53
commit
bc5ba007b2
3 changed files with 10 additions and 1 deletions
|
@ -1882,12 +1882,17 @@ struct server_context {
|
|||
}
|
||||
|
||||
if (slot.inf_type == SERVER_TASK_INF_TYPE_EMBEDDING || slot.inf_type == SERVER_TASK_INF_TYPE_RERANK) {
|
||||
// this prompt is too large to process - discard it
|
||||
if (slot.n_prompt_tokens > n_ubatch) {
|
||||
slot.release();
|
||||
send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (slot.n_prompt_tokens > slot.n_ctx) {
|
||||
slot.release();
|
||||
send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_SERVER);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (!params.ctx_shift) {
|
||||
// if context shift is disabled, we make sure prompt size is smaller than KV size
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue