move prompt_tokens.empty() check

2024-10-23 23:39:48 +02:00 · 2024-10-23 23:39:48 +02:00 · 5c749bea00
commit 5c749bea00
parent 125835b253
1 changed files with 11 additions and 15 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1866,12 +1866,8 @@ struct server_context {
        // next, batch any pending prompts without exceeding n_batch
        if (params.cont_batching || batch.n_tokens == 0) {
            for (auto & slot : slots) {
                if (!slot.is_processing()) {
                    continue;
                }
                // this slot still has a prompt to be processed
-                if (!slot.prompt_tokens.empty() && slot.state == SLOT_STATE_PROCESSING_PROMPT) {
+                if (slot.state == SLOT_STATE_PROCESSING_PROMPT) {
                    auto & prompt_tokens = slot.prompt_tokens;
                    slot.t_start_process_prompt = ggml_time_us();
@ -1879,6 +1875,16 @@ struct server_context {
                    slot.n_past = 0;
                    slot.n_prompt_tokens = prompt_tokens.size();
                    // empty prompt passed -> release the slot and send empty response
                    if (prompt_tokens.empty()) {
                        SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
                        slot.release();
                        slot.print_timings();
                        send_final_response(slot);
                        continue;
                    }
                    SLT_INF(slot, "new prompt, n_ctx_slot = %d, n_keep = %d, n_prompt_tokens = %d\n", slot.n_ctx, slot.params.n_keep, slot.n_prompt_tokens);
                    // print prompt tokens (for debugging)
@ -1894,16 +1900,6 @@ struct server_context {
                        }
                    }
                    // empty prompt passed -> release the slot and send empty response
                    if (prompt_tokens.empty()) {
                        SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
                        slot.release();
                        slot.print_timings();
                        send_final_response(slot);
                        continue;
                    }
                    if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) {
                        // this prompt is too large to process - discard it
                        if (slot.n_prompt_tokens > n_ubatch) {