move prompt_tokens.empty() check
This commit is contained in:
parent
125835b253
commit
5c749bea00
1 changed files with 11 additions and 15 deletions
|
@ -1866,12 +1866,8 @@ struct server_context {
|
||||||
// next, batch any pending prompts without exceeding n_batch
|
// next, batch any pending prompts without exceeding n_batch
|
||||||
if (params.cont_batching || batch.n_tokens == 0) {
|
if (params.cont_batching || batch.n_tokens == 0) {
|
||||||
for (auto & slot : slots) {
|
for (auto & slot : slots) {
|
||||||
if (!slot.is_processing()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// this slot still has a prompt to be processed
|
// this slot still has a prompt to be processed
|
||||||
if (!slot.prompt_tokens.empty() && slot.state == SLOT_STATE_PROCESSING_PROMPT) {
|
if (slot.state == SLOT_STATE_PROCESSING_PROMPT) {
|
||||||
auto & prompt_tokens = slot.prompt_tokens;
|
auto & prompt_tokens = slot.prompt_tokens;
|
||||||
|
|
||||||
slot.t_start_process_prompt = ggml_time_us();
|
slot.t_start_process_prompt = ggml_time_us();
|
||||||
|
@ -1879,6 +1875,16 @@ struct server_context {
|
||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.n_prompt_tokens = prompt_tokens.size();
|
slot.n_prompt_tokens = prompt_tokens.size();
|
||||||
|
|
||||||
|
// empty prompt passed -> release the slot and send empty response
|
||||||
|
if (prompt_tokens.empty()) {
|
||||||
|
SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
|
||||||
|
|
||||||
|
slot.release();
|
||||||
|
slot.print_timings();
|
||||||
|
send_final_response(slot);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
SLT_INF(slot, "new prompt, n_ctx_slot = %d, n_keep = %d, n_prompt_tokens = %d\n", slot.n_ctx, slot.params.n_keep, slot.n_prompt_tokens);
|
SLT_INF(slot, "new prompt, n_ctx_slot = %d, n_keep = %d, n_prompt_tokens = %d\n", slot.n_ctx, slot.params.n_keep, slot.n_prompt_tokens);
|
||||||
|
|
||||||
// print prompt tokens (for debugging)
|
// print prompt tokens (for debugging)
|
||||||
|
@ -1894,16 +1900,6 @@ struct server_context {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// empty prompt passed -> release the slot and send empty response
|
|
||||||
if (prompt_tokens.empty()) {
|
|
||||||
SLT_WRN(slot, "%s", "empty prompt - releasing slot\n");
|
|
||||||
|
|
||||||
slot.release();
|
|
||||||
slot.print_timings();
|
|
||||||
send_final_response(slot);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) {
|
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) {
|
||||||
// this prompt is too large to process - discard it
|
// this prompt is too large to process - discard it
|
||||||
if (slot.n_prompt_tokens > n_ubatch) {
|
if (slot.n_prompt_tokens > n_ubatch) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue