move prompt_tokens.empty() check

This commit is contained in:
Xuan Son Nguyen 2024-10-23 23:39:48 +02:00
parent 125835b253
commit 5c749bea00

View file

@ -1866,12 +1866,8 @@ struct server_context {
// next, batch any pending prompts without exceeding n_batch
if (params.cont_batching || batch.n_tokens == 0) {
for (auto & slot : slots) {
if (!slot.is_processing()) {
continue;
}
// this slot still has a prompt to be processed
if (!slot.prompt_tokens.empty() && slot.state == SLOT_STATE_PROCESSING_PROMPT) {
if (slot.state == SLOT_STATE_PROCESSING_PROMPT) {
auto & prompt_tokens = slot.prompt_tokens;
slot.t_start_process_prompt = ggml_time_us();
@ -1879,6 +1875,16 @@ struct server_context {
slot.n_past = 0;
slot.n_prompt_tokens = prompt_tokens.size();
// empty prompt passed -> release the slot and send empty response
if (prompt_tokens.empty()) {
SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
slot.release();
slot.print_timings();
send_final_response(slot);
continue;
}
SLT_INF(slot, "new prompt, n_ctx_slot = %d, n_keep = %d, n_prompt_tokens = %d\n", slot.n_ctx, slot.params.n_keep, slot.n_prompt_tokens);
// print prompt tokens (for debugging)
@ -1894,16 +1900,6 @@ struct server_context {
}
}
// empty prompt passed -> release the slot and send empty response
if (prompt_tokens.empty()) {
SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
slot.release();
slot.print_timings();
send_final_response(slot);
continue;
}
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) {
// this prompt is too large to process - discard it
if (slot.n_prompt_tokens > n_ubatch) {