server : check that the prompt fits in the slot's context (#10030)

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-10-25 10:13:46 +03:00 committed by GitHub
parent 958367bf53
commit bc5ba007b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 10 additions and 1 deletions

View file

@ -1882,12 +1882,17 @@ struct server_context {
}
if (slot.inf_type == SERVER_TASK_INF_TYPE_EMBEDDING || slot.inf_type == SERVER_TASK_INF_TYPE_RERANK) {
// this prompt is too large to process - discard it
if (slot.n_prompt_tokens > n_ubatch) {
slot.release();
send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
continue;
}
if (slot.n_prompt_tokens > slot.n_ctx) {
slot.release();
send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_SERVER);
continue;
}
} else {
if (!params.ctx_shift) {
// if context shift is disabled, we make sure prompt size is smaller than KV size