Merge branch 'master' into compilade/refactor-kv-cache
This commit is contained in:
commit
ff794f5535
50 changed files with 3235 additions and 2041 deletions
|
@ -416,7 +416,7 @@
|
|||
message = html`<${Probabilities} data=${data} />`
|
||||
} else {
|
||||
const text = isArrayMessage ?
|
||||
data.map(msg => msg.content).join('').replace(/^\s+/, '') :
|
||||
data.map(msg => msg.content).join('') :
|
||||
data;
|
||||
message = isCompletionMode ?
|
||||
text :
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
|
||||
const SPACE_RULE = '" "?';
|
||||
const SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}';
|
||||
|
||||
function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
|
||||
if (minItems === 0 && maxItems === 1) {
|
||||
|
@ -41,7 +41,7 @@ const PRIMITIVE_RULES = {
|
|||
object : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
|
||||
array : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
|
||||
uuid : new BuiltinRule('"\\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\\"" space', []),
|
||||
char : new BuiltinRule(`[^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F]{4})`, []),
|
||||
char : new BuiltinRule(`[^"\\\\\\x7F\\x00-\\x1F] | [\\\\] (["\\\\bfnrt] | "u" [0-9a-fA-F]{4})`, []),
|
||||
string : new BuiltinRule(`"\\"" char* "\\"" space`, ['char']),
|
||||
null : new BuiltinRule('"null" space', []),
|
||||
};
|
||||
|
|
|
@ -147,7 +147,7 @@ struct server_slot {
|
|||
int32_t n_prompt_tokens = 0;
|
||||
int32_t n_prompt_tokens_processed = 0;
|
||||
|
||||
json prompt;
|
||||
json prompt; // can be either a string, array of strings or array of token ids
|
||||
|
||||
// when a task is submitted, we first tokenize the prompt and store it here
|
||||
std::vector<llama_token> prompt_tokens;
|
||||
|
@ -958,13 +958,16 @@ struct server_context {
|
|||
if (!task.infill) {
|
||||
const auto & prompt = data.find("prompt");
|
||||
if (prompt == data.end()) {
|
||||
send_error(task, "Either \"prompt\" or \"messages\" must be provided", ERROR_TYPE_INVALID_REQUEST);
|
||||
send_error(task, "\"prompt\" must be provided", ERROR_TYPE_INVALID_REQUEST);
|
||||
return false;
|
||||
} else {
|
||||
slot.prompt = *prompt;
|
||||
}
|
||||
if (slot.prompt.is_array() && slot.prompt.size() == 0) {
|
||||
send_error(task, "\"prompt\" cannot be an empty array", ERROR_TYPE_INVALID_REQUEST);
|
||||
|
||||
if ((prompt->is_string()) ||
|
||||
(prompt->is_array() && prompt->size() == 1 && prompt->at(0).is_string()) ||
|
||||
(prompt->is_array() && !prompt->empty() && prompt->at(0).is_number_integer())) {
|
||||
slot.prompt = *prompt;
|
||||
} else {
|
||||
send_error(task, "\"prompt\" must be a string or an array of integers", ERROR_TYPE_INVALID_REQUEST);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1582,14 +1585,18 @@ struct server_context {
|
|||
switch (task.type) {
|
||||
case SERVER_TASK_TYPE_COMPLETION:
|
||||
{
|
||||
int id_slot = json_value(task.data, "id_slot", -1);
|
||||
std::string prompt = json_value(task.data, "prompt", std::string());
|
||||
const int id_slot = json_value(task.data, "id_slot", -1);
|
||||
|
||||
server_slot * slot;
|
||||
|
||||
if (id_slot != -1) {
|
||||
slot = get_slot_by_id(id_slot);
|
||||
} else {
|
||||
std::string prompt;
|
||||
if (task.data.contains("prompt") && task.data.at("prompt").is_string()) {
|
||||
json_value(task.data, "prompt", std::string());
|
||||
}
|
||||
|
||||
slot = get_available_slot(prompt);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue