add SLOT_STATE_DONE_PROMPT
This commit is contained in:
parent
2c81cde493
commit
446d57d7cd
1 changed files with 16 additions and 8 deletions
|
@ -53,6 +53,7 @@ enum stop_type {
|
||||||
enum slot_state {
|
enum slot_state {
|
||||||
SLOT_STATE_IDLE,
|
SLOT_STATE_IDLE,
|
||||||
SLOT_STATE_PROCESSING_PROMPT,
|
SLOT_STATE_PROCESSING_PROMPT,
|
||||||
|
SLOT_STATE_DONE_PROMPT,
|
||||||
SLOT_STATE_GENERATING,
|
SLOT_STATE_GENERATING,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2235,9 +2236,9 @@ struct server_context {
|
||||||
{"progress", (float) slot.n_prompt_tokens_processed / slot.n_prompt_tokens},
|
{"progress", (float) slot.n_prompt_tokens_processed / slot.n_prompt_tokens},
|
||||||
});
|
});
|
||||||
|
|
||||||
// entire prompt has been processed - start decoding new tokens
|
// entire prompt has been processed
|
||||||
if (slot.n_past == slot.n_prompt_tokens) {
|
if (slot.n_past == slot.n_prompt_tokens) {
|
||||||
slot.state = SLOT_STATE_GENERATING;
|
slot.state = SLOT_STATE_DONE_PROMPT;
|
||||||
|
|
||||||
GGML_ASSERT(batch.n_tokens > 0);
|
GGML_ASSERT(batch.n_tokens > 0);
|
||||||
|
|
||||||
|
@ -2349,16 +2350,23 @@ struct server_context {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto & slot : slots) {
|
for (auto & slot : slots) {
|
||||||
if (slot.state != SLOT_STATE_GENERATING || slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) {
|
if (slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) {
|
||||||
continue; // continue loop of slots
|
continue; // continue loop of slots
|
||||||
}
|
}
|
||||||
|
|
||||||
// prompt evaluated for embedding
|
if (slot.state == SLOT_STATE_DONE_PROMPT) {
|
||||||
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING) {
|
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING) {
|
||||||
|
// prompt evaluated for embedding
|
||||||
send_embedding(slot, batch_view);
|
send_embedding(slot, batch_view);
|
||||||
slot.release();
|
slot.release();
|
||||||
slot.i_batch = -1;
|
slot.i_batch = -1;
|
||||||
continue; // continue loop of slots
|
continue; // continue loop of slots
|
||||||
|
} else {
|
||||||
|
// prompt evaluated for next-token prediction
|
||||||
|
slot.state = SLOT_STATE_GENERATING;
|
||||||
|
}
|
||||||
|
} else if (slot.state != SLOT_STATE_GENERATING) {
|
||||||
|
continue; // continue loop of slots
|
||||||
}
|
}
|
||||||
|
|
||||||
completion_token_output result;
|
completion_token_output result;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue