server : simplify json parsing + add comment about t_last

This commit is contained in:
Georgi Gerganov 2024-03-07 10:17:10 +02:00
parent 9c8d3c8a25
commit fd74b5ea34
No known key found for this signature in database
GPG key ID: BF970631944C16B7
2 changed files with 9 additions and 19 deletions

View file

@ -764,6 +764,7 @@ struct llama_server_context {
server_slot * get_slot(int id) { server_slot * get_slot(int id) {
int64_t t_last = ggml_time_us(); int64_t t_last = ggml_time_us();
server_slot * last_used = nullptr; server_slot * last_used = nullptr;
for (server_slot & slot : slots) { for (server_slot & slot : slots) {
@ -771,6 +772,7 @@ struct llama_server_context {
return &slot; return &slot;
} }
// among all available slots, find the one that has been least recently used
if (slot.available() && slot.t_last_used < t_last) { if (slot.available() && slot.t_last_used < t_last) {
last_used = &slot; last_used = &slot;
t_last = slot.t_last_used; t_last = slot.t_last_used;
@ -832,23 +834,9 @@ struct llama_server_context {
} }
// infill // infill
if (data.count("input_prefix") != 0) { slot.params.input_prefix = json_value(data, "input_prefix", default_params.input_prefix);
slot.params.input_prefix = data["input_prefix"]; slot.params.input_suffix = json_value(data, "input_suffix", default_params.input_suffix);
} else { slot.prompt = json_value(data, "prompt", std::string(""));
slot.params.input_prefix = "";
}
if (data.count("input_suffix") != 0) {
slot.params.input_suffix = data["input_suffix"];
} else {
slot.params.input_suffix = "";
}
if (data.count("prompt") != 0) {
slot.prompt = data["prompt"];
} else {
slot.prompt = "";
}
// penalize user-provided tokens // penalize user-provided tokens
{ {

View file

@ -13533,12 +13533,14 @@ LLAMA_API int32_t llama_chat_apply_template(
curr_tmpl = std::string(model_template.data(), model_template.size()); curr_tmpl = std::string(model_template.data(), model_template.size());
} }
} }
// format the chat to string // format the chat to string
std::vector<const llama_chat_message *> chat_vec; std::vector<const llama_chat_message *> chat_vec;
chat_vec.resize(n_msg); chat_vec.resize(n_msg);
for (size_t i = 0; i < n_msg; i++) { for (size_t i = 0; i < n_msg; i++) {
chat_vec[i] = &chat[i]; chat_vec[i] = &chat[i];
} }
std::string formatted_chat; std::string formatted_chat;
int32_t res = llama_chat_apply_template_internal(curr_tmpl, chat_vec, formatted_chat, add_ass); int32_t res = llama_chat_apply_template_internal(curr_tmpl, chat_vec, formatted_chat, add_ass);
if (res < 0) { if (res < 0) {