Merge branch 'master' into compilade/refactor-kv-cache

This commit is contained in:
Francis Couture-Harpin 2024-06-12 12:10:29 -04:00
commit ff794f5535
50 changed files with 3235 additions and 2041 deletions

View file

@ -147,7 +147,7 @@ struct server_slot {
int32_t n_prompt_tokens = 0;
int32_t n_prompt_tokens_processed = 0;
json prompt;
json prompt; // can be either a string, array of strings or array of token ids
// when a task is submitted, we first tokenize the prompt and store it here
std::vector<llama_token> prompt_tokens;
@ -958,13 +958,16 @@ struct server_context {
if (!task.infill) {
const auto & prompt = data.find("prompt");
if (prompt == data.end()) {
send_error(task, "Either \"prompt\" or \"messages\" must be provided", ERROR_TYPE_INVALID_REQUEST);
send_error(task, "\"prompt\" must be provided", ERROR_TYPE_INVALID_REQUEST);
return false;
} else {
slot.prompt = *prompt;
}
if (slot.prompt.is_array() && slot.prompt.size() == 0) {
send_error(task, "\"prompt\" cannot be an empty array", ERROR_TYPE_INVALID_REQUEST);
if ((prompt->is_string()) ||
(prompt->is_array() && prompt->size() == 1 && prompt->at(0).is_string()) ||
(prompt->is_array() && !prompt->empty() && prompt->at(0).is_number_integer())) {
slot.prompt = *prompt;
} else {
send_error(task, "\"prompt\" must be a string or an array of integers", ERROR_TYPE_INVALID_REQUEST);
return false;
}
}
@ -1582,14 +1585,18 @@ struct server_context {
switch (task.type) {
case SERVER_TASK_TYPE_COMPLETION:
{
int id_slot = json_value(task.data, "id_slot", -1);
std::string prompt = json_value(task.data, "prompt", std::string());
const int id_slot = json_value(task.data, "id_slot", -1);
server_slot * slot;
if (id_slot != -1) {
slot = get_slot_by_id(id_slot);
} else {
std::string prompt;
if (task.data.contains("prompt") && task.data.at("prompt").is_string()) {
json_value(task.data, "prompt", std::string());
}
slot = get_available_slot(prompt);
}