server : simplify json parsing + add comment about t_last
This commit is contained in:
parent
9c8d3c8a25
commit
fd74b5ea34
2 changed files with 9 additions and 19 deletions
|
@ -764,6 +764,7 @@ struct llama_server_context {
|
||||||
|
|
||||||
server_slot * get_slot(int id) {
|
server_slot * get_slot(int id) {
|
||||||
int64_t t_last = ggml_time_us();
|
int64_t t_last = ggml_time_us();
|
||||||
|
|
||||||
server_slot * last_used = nullptr;
|
server_slot * last_used = nullptr;
|
||||||
|
|
||||||
for (server_slot & slot : slots) {
|
for (server_slot & slot : slots) {
|
||||||
|
@ -771,6 +772,7 @@ struct llama_server_context {
|
||||||
return &slot;
|
return &slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// among all available slots, find the one that has been least recently used
|
||||||
if (slot.available() && slot.t_last_used < t_last) {
|
if (slot.available() && slot.t_last_used < t_last) {
|
||||||
last_used = &slot;
|
last_used = &slot;
|
||||||
t_last = slot.t_last_used;
|
t_last = slot.t_last_used;
|
||||||
|
@ -832,23 +834,9 @@ struct llama_server_context {
|
||||||
}
|
}
|
||||||
|
|
||||||
// infill
|
// infill
|
||||||
if (data.count("input_prefix") != 0) {
|
slot.params.input_prefix = json_value(data, "input_prefix", default_params.input_prefix);
|
||||||
slot.params.input_prefix = data["input_prefix"];
|
slot.params.input_suffix = json_value(data, "input_suffix", default_params.input_suffix);
|
||||||
} else {
|
slot.prompt = json_value(data, "prompt", std::string(""));
|
||||||
slot.params.input_prefix = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.count("input_suffix") != 0) {
|
|
||||||
slot.params.input_suffix = data["input_suffix"];
|
|
||||||
} else {
|
|
||||||
slot.params.input_suffix = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.count("prompt") != 0) {
|
|
||||||
slot.prompt = data["prompt"];
|
|
||||||
} else {
|
|
||||||
slot.prompt = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// penalize user-provided tokens
|
// penalize user-provided tokens
|
||||||
{
|
{
|
||||||
|
|
|
@ -13533,12 +13533,14 @@ LLAMA_API int32_t llama_chat_apply_template(
|
||||||
curr_tmpl = std::string(model_template.data(), model_template.size());
|
curr_tmpl = std::string(model_template.data(), model_template.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// format the chat to string
|
// format the chat to string
|
||||||
std::vector<const llama_chat_message *> chat_vec;
|
std::vector<const llama_chat_message *> chat_vec;
|
||||||
chat_vec.resize(n_msg);
|
chat_vec.resize(n_msg);
|
||||||
for (size_t i = 0; i < n_msg; i++) {
|
for (size_t i = 0; i < n_msg; i++) {
|
||||||
chat_vec[i] = &chat[i];
|
chat_vec[i] = &chat[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string formatted_chat;
|
std::string formatted_chat;
|
||||||
int32_t res = llama_chat_apply_template_internal(curr_tmpl, chat_vec, formatted_chat, add_ass);
|
int32_t res = llama_chat_apply_template_internal(curr_tmpl, chat_vec, formatted_chat, add_ass);
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue