server : accept extra_context for the infill endpoint (#9874)
* server : accept extra_context for the infill endpoint ggml-ci * server : update readme [no ci] * server : use repo-level FIM pattern if possible ggml-ci
This commit is contained in:
parent
c7181bd294
commit
d4c19c0f5c
3 changed files with 153 additions and 26 deletions
|
@ -6596,8 +6596,8 @@ static void llm_load_vocab(
|
|||
) {
|
||||
vocab.special_eot_id = t.second;
|
||||
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.first.c_str());
|
||||
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||
}
|
||||
}
|
||||
|
@ -6610,8 +6610,8 @@ static void llm_load_vocab(
|
|||
) {
|
||||
vocab.special_eom_id = t.second;
|
||||
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.first.c_str());
|
||||
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||
}
|
||||
}
|
||||
|
@ -6627,8 +6627,8 @@ static void llm_load_vocab(
|
|||
) {
|
||||
vocab.special_fim_pre_id = t.second;
|
||||
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.first.c_str());
|
||||
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||
}
|
||||
}
|
||||
|
@ -6644,8 +6644,8 @@ static void llm_load_vocab(
|
|||
) {
|
||||
vocab.special_fim_suf_id = t.second;
|
||||
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.first.c_str());
|
||||
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||
}
|
||||
}
|
||||
|
@ -6661,8 +6661,8 @@ static void llm_load_vocab(
|
|||
) {
|
||||
vocab.special_fim_mid_id = t.second;
|
||||
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.first.c_str());
|
||||
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||
}
|
||||
}
|
||||
|
@ -6677,8 +6677,8 @@ static void llm_load_vocab(
|
|||
) {
|
||||
vocab.special_fim_pad_id = t.second;
|
||||
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.first.c_str());
|
||||
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||
}
|
||||
}
|
||||
|
@ -6694,8 +6694,8 @@ static void llm_load_vocab(
|
|||
) {
|
||||
vocab.special_fim_rep_id = t.second;
|
||||
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.first.c_str());
|
||||
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||
}
|
||||
}
|
||||
|
@ -6708,8 +6708,8 @@ static void llm_load_vocab(
|
|||
) {
|
||||
vocab.special_fim_sep_id = t.second;
|
||||
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.first.c_str());
|
||||
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||
}
|
||||
}
|
||||
|
@ -6720,6 +6720,19 @@ static void llm_load_vocab(
|
|||
// this is currently determined based on the token text, which is obviously not ideal
|
||||
// ref: https://github.com/ggerganov/llama.cpp/issues/9606
|
||||
vocab.special_eog_ids.clear();
|
||||
|
||||
if (vocab.special_fim_pad_id != LLAMA_TOKEN_NULL && vocab.special_eog_ids.count(vocab.special_fim_pad_id) == 0) {
|
||||
vocab.special_eog_ids.insert(vocab.special_fim_pad_id);
|
||||
}
|
||||
|
||||
if (vocab.special_fim_rep_id != LLAMA_TOKEN_NULL && vocab.special_eog_ids.count(vocab.special_fim_rep_id) == 0) {
|
||||
vocab.special_eog_ids.insert(vocab.special_fim_rep_id);
|
||||
}
|
||||
|
||||
if (vocab.special_fim_sep_id != LLAMA_TOKEN_NULL && vocab.special_eog_ids.count(vocab.special_fim_sep_id) == 0) {
|
||||
vocab.special_eog_ids.insert(vocab.special_fim_sep_id);
|
||||
}
|
||||
|
||||
for (const auto & t : vocab.token_to_id) {
|
||||
if (false
|
||||
|| t.first == "<|eot_id|>"
|
||||
|
@ -6732,13 +6745,20 @@ static void llm_load_vocab(
|
|||
) {
|
||||
vocab.special_eog_ids.insert(t.second);
|
||||
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.first.c_str());
|
||||
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||
}
|
||||
} else {
|
||||
// token is control, but not marked as EOG -> print a warning
|
||||
if (vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL && vocab.special_eog_ids.count(t.second) == 0) {
|
||||
LLAMA_LOG_WARN("%s: control token: %6d '%s' is not marked as EOG\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sanity checks
|
||||
if (vocab.special_eos_id != LLAMA_TOKEN_NULL && vocab.special_eog_ids.count(vocab.special_eos_id) == 0) {
|
||||
vocab.special_eog_ids.insert(vocab.special_eos_id);
|
||||
LLAMA_LOG_WARN("%s: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue