examples : rely on new behavior of add_special

This commit is contained in:
Jared Van Bortel 2024-04-04 18:12:33 -04:00
parent d1a1b614cd
commit 92591c125f
14 changed files with 53 additions and 69 deletions

View file

@ -685,6 +685,7 @@ struct server_context {
n_ctx = llama_n_ctx(ctx);
add_bos_token = llama_should_add_bos_token(model);
GGML_ASSERT(llama_add_eos_token(model) != 1);
return true;
}
@ -754,7 +755,7 @@ struct server_context {
metrics.init();
}
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const {
std::vector<llama_token> tokenize(const json & json_prompt, bool add_special) const {
// TODO: currently, we tokenize using special tokens by default
// this is not always correct (see https://github.com/ggerganov/llama.cpp/pull/4160#issuecomment-1824826216)
// but it's better compared to completely ignoring ChatML and other chat templates
@ -772,7 +773,7 @@ struct server_context {
std::vector<llama_token> p;
if (first) {
p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
p = ::llama_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL);
first = false;
} else {
p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
@ -789,7 +790,7 @@ struct server_context {
}
} else {
auto s = json_prompt.template get<std::string>();
prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
prompt_tokens = ::llama_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL);
}
return prompt_tokens;
@ -1054,7 +1055,7 @@ struct server_context {
system_tokens.clear();
if (!system_prompt.empty()) {
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
system_tokens = ::llama_tokenize(ctx, system_prompt, true);
llama_batch_clear(batch);
@ -1809,7 +1810,7 @@ struct server_context {
prefix_tokens.push_back(llama_token_middle(model));
prompt_tokens = prefix_tokens;
} else {
prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token); // add BOS if there isn't system prompt
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
}
slot.n_past = 0;