minor style fix

This commit is contained in:
Xuan Son Nguyen 2024-12-04 19:36:37 +01:00
parent 3b41ad53a3
commit 1261086163
2 changed files with 21 additions and 13 deletions

View file

@ -1170,12 +1170,15 @@ struct server_context {
server_task_result_cmpl_partial res;
res.id = slot.id_task;
res.index = slot.index;
res.content = tkn.text_to_send;
res.truncated = slot.truncated;
res.n_decoded = slot.n_decoded;
res.n_prompt_tokens = slot.n_prompt_tokens;
res.content = tkn.text_to_send;
res.stop = slot.stop;
res.truncated = slot.truncated;
res.stop = slot.stop;
// populate res.probs_output
if (slot.params.sampling.n_probs > 0) {
const llama_tokens to_send_toks = common_tokenize(ctx, tkn.text_to_send, false);
const size_t probs_pos = std::min(slot.n_sent_token_probs, slot.generated_token_probs.size());
@ -1206,20 +1209,22 @@ struct server_context {
server_task_result_cmpl_final res;
res.id = slot.id_task;
res.id_slot = slot.id;
res.index = slot.index;
res.content = slot.generated_text;
res.timings = slot.get_timings();
res.model_alias = slot.oaicompat_model;
res.prompt = common_detokenize(ctx, slot.prompt_tokens, true);
res.truncated = slot.truncated;
res.n_decoded = slot.n_decoded;
res.n_prompt_tokens = slot.n_prompt_tokens;
res.has_new_line = slot.has_new_line;
res.n_tokens_cached = slot.n_past;
res.content = slot.generated_text;
res.has_new_line = slot.has_new_line;
res.stopping_word = slot.stopping_word;
res.stop = slot.stop;
res.truncated = slot.truncated;
res.timings = slot.get_timings();
res.generation_params = slot.params; // copy the parameters
// populate res.probs_output
if (slot.params.sampling.n_probs > 0) {
if (!slot.params.stream && slot.stop == STOP_TYPE_WORD) {
const llama_tokens stop_word_toks = common_tokenize(ctx, slot.stopping_word, false);
@ -1235,6 +1240,8 @@ struct server_context {
}
}
res.generation_params = slot.params; // copy the parameters
queue_results.send(res);
}

View file

@ -237,7 +237,6 @@ struct server_task_result_cmpl_final : server_task_result {
int index = 0;
std::string content;
bool stream;
bool timings_per_token;
result_timings timings;
std::string model_alias;
std::string prompt;
@ -245,10 +244,11 @@ struct server_task_result_cmpl_final : server_task_result {
bool truncated;
int32_t n_decoded;
int32_t n_prompt_tokens;
int32_t has_new_line;
int32_t stopping_word;
int32_t n_tokens_cached;
int32_t has_new_line;
std::string stopping_word;
stop_type stop = STOP_TYPE_NONE;
std::vector<completion_token_output> probs_output;
slot_params generation_params;
@ -291,6 +291,7 @@ struct server_task_result_cmpl_partial : server_task_result {
int32_t n_prompt_tokens;
stop_type stop = STOP_TYPE_NONE;
std::vector<completion_token_output> probs_output;
result_timings timings;
@ -346,7 +347,7 @@ struct server_task_result_embd : server_task_result {
struct server_task_result_rerank : server_task_result {
server_task_result_rerank() : server_task_result(RESULT_TYPE_RERANK) {}
int index = 0;
float score;
float score = -1e6;
json to_json() {
return json {