From 5c749bea009183582b1f2583fa3d198375844fb4 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Wed, 23 Oct 2024 23:39:48 +0200 Subject: [PATCH] move prompt_tokens.empty() check --- examples/server/server.cpp | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 13bea289b..e049927d0 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1866,12 +1866,8 @@ struct server_context { // next, batch any pending prompts without exceeding n_batch if (params.cont_batching || batch.n_tokens == 0) { for (auto & slot : slots) { - if (!slot.is_processing()) { - continue; - } - // this slot still has a prompt to be processed - if (!slot.prompt_tokens.empty() && slot.state == SLOT_STATE_PROCESSING_PROMPT) { + if (slot.state == SLOT_STATE_PROCESSING_PROMPT) { auto & prompt_tokens = slot.prompt_tokens; slot.t_start_process_prompt = ggml_time_us(); @@ -1879,6 +1875,16 @@ struct server_context { slot.n_past = 0; slot.n_prompt_tokens = prompt_tokens.size(); + // empty prompt passed -> release the slot and send empty response + if (prompt_tokens.empty()) { + SLT_WRN(slot, "%s", "empty prompt - releasing slot\n"); + + slot.release(); + slot.print_timings(); + send_final_response(slot); + continue; + } + SLT_INF(slot, "new prompt, n_ctx_slot = %d, n_keep = %d, n_prompt_tokens = %d\n", slot.n_ctx, slot.params.n_keep, slot.n_prompt_tokens); // print prompt tokens (for debugging) @@ -1894,16 +1900,6 @@ struct server_context { } } - // empty prompt passed -> release the slot and send empty response - if (prompt_tokens.empty()) { - SLT_WRN(slot, "%s", "empty prompt - releasing slot\n"); - - slot.release(); - slot.print_timings(); - send_final_response(slot); - continue; - } - if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) { // this prompt is too large to process - discard it if (slot.n_prompt_tokens > n_ubatch) {