move prompt_tokens.empty() check

This commit is contained in:
Xuan Son Nguyen 2024-10-23 23:39:48 +02:00
parent 125835b253
commit 5c749bea00

View file

@ -1866,12 +1866,8 @@ struct server_context {
// next, batch any pending prompts without exceeding n_batch // next, batch any pending prompts without exceeding n_batch
if (params.cont_batching || batch.n_tokens == 0) { if (params.cont_batching || batch.n_tokens == 0) {
for (auto & slot : slots) { for (auto & slot : slots) {
if (!slot.is_processing()) {
continue;
}
// this slot still has a prompt to be processed // this slot still has a prompt to be processed
if (!slot.prompt_tokens.empty() && slot.state == SLOT_STATE_PROCESSING_PROMPT) { if (slot.state == SLOT_STATE_PROCESSING_PROMPT) {
auto & prompt_tokens = slot.prompt_tokens; auto & prompt_tokens = slot.prompt_tokens;
slot.t_start_process_prompt = ggml_time_us(); slot.t_start_process_prompt = ggml_time_us();
@ -1879,6 +1875,16 @@ struct server_context {
slot.n_past = 0; slot.n_past = 0;
slot.n_prompt_tokens = prompt_tokens.size(); slot.n_prompt_tokens = prompt_tokens.size();
// empty prompt passed -> release the slot and send empty response
if (prompt_tokens.empty()) {
SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
slot.release();
slot.print_timings();
send_final_response(slot);
continue;
}
SLT_INF(slot, "new prompt, n_ctx_slot = %d, n_keep = %d, n_prompt_tokens = %d\n", slot.n_ctx, slot.params.n_keep, slot.n_prompt_tokens); SLT_INF(slot, "new prompt, n_ctx_slot = %d, n_keep = %d, n_prompt_tokens = %d\n", slot.n_ctx, slot.params.n_keep, slot.n_prompt_tokens);
// print prompt tokens (for debugging) // print prompt tokens (for debugging)
@ -1894,16 +1900,6 @@ struct server_context {
} }
} }
// empty prompt passed -> release the slot and send empty response
if (prompt_tokens.empty()) {
SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
slot.release();
slot.print_timings();
send_final_response(slot);
continue;
}
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) { if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) {
// this prompt is too large to process - discard it // this prompt is too large to process - discard it
if (slot.n_prompt_tokens > n_ubatch) { if (slot.n_prompt_tokens > n_ubatch) {