From 5c749bea009183582b1f2583fa3d198375844fb4 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Wed, 23 Oct 2024 23:39:48 +0200
Subject: [PATCH] move prompt_tokens.empty() check

---
 examples/server/server.cpp | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 13bea289b..e049927d0 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1866,12 +1866,8 @@ struct server_context {
         // next, batch any pending prompts without exceeding n_batch
         if (params.cont_batching || batch.n_tokens == 0) {
             for (auto & slot : slots) {
-                if (!slot.is_processing()) {
-                    continue;
-                }
-
                 // this slot still has a prompt to be processed
-                if (!slot.prompt_tokens.empty() && slot.state == SLOT_STATE_PROCESSING_PROMPT) {
+                if (slot.state == SLOT_STATE_PROCESSING_PROMPT) {
                     auto & prompt_tokens = slot.prompt_tokens;
 
                     slot.t_start_process_prompt = ggml_time_us();
@@ -1879,6 +1875,16 @@ struct server_context {
                     slot.n_past = 0;
                     slot.n_prompt_tokens = prompt_tokens.size();
 
+                    // empty prompt passed -> release the slot and send empty response
+                    if (prompt_tokens.empty()) {
+                        SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
+
+                        slot.release();
+                        slot.print_timings();
+                        send_final_response(slot);
+                        continue;
+                    }
+
                     SLT_INF(slot, "new prompt, n_ctx_slot = %d, n_keep = %d, n_prompt_tokens = %d\n", slot.n_ctx, slot.params.n_keep, slot.n_prompt_tokens);
 
                     // print prompt tokens (for debugging)
@@ -1894,16 +1900,6 @@ struct server_context {
                         }
                     }
 
-                    // empty prompt passed -> release the slot and send empty response
-                    if (prompt_tokens.empty()) {
-                        SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
-
-                        slot.release();
-                        slot.print_timings();
-                        send_final_response(slot);
-                        continue;
-                    }
-
                     if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) {
                         // this prompt is too large to process - discard it
                         if (slot.n_prompt_tokens > n_ubatch) {