llama : auto-detect more EOT tokens when missing in KV data

This commit is contained in:
Georgi Gerganov 2024-04-21 11:14:19 +03:00
parent 4b47c24bf2
commit d0a4cc8ec8
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 12 additions and 6 deletions

View file

@ -381,10 +381,6 @@ static json oaicompat_completion_params_parse(
} else {
llama_params["stop"] = json_value(body, "stop", json::array());
}
// Some chat templates don't use EOS token to stop generation
// We must add their end sequences to list of stop words
llama_params["stop"].push_back("<|im_end|>"); // chatml
llama_params["stop"].push_back("<end_of_turn>"); // gemma
// Handle "response_format" field
if (body.contains("response_format")) {

View file

@ -4309,11 +4309,21 @@ static void llm_load_vocab(
}
}
// find EOT token "<|eot_id|>"
// find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc.
//
// TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
// for now, we apply this workaround to find the EOT token based on its text
if (vocab.special_eot_id == -1) {
for (const auto & t : vocab.token_to_id) {
if (t.first == "<|eot_id|>" && vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL) {
if (
// TODO: gemma "<end_of_turn>" is exported as a normal token, so the following check does not work
// need to fix convert script
//vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
(t.first == "<|eot_id|>" ||
t.first == "<|im_emd|>" ||
t.first == "<end_of_turn>"
)
) {
vocab.special_eot_id = t.second;
break;
}