Add Jinja template support (#11016)

* Copy minja from 58f0ca6dd7

* Add --jinja and --chat-template-file flags

* Add missing <optional> include

* Avoid print in get_hf_chat_template.py

* No designated initializers yet

* Try and work around msvc++ non-macro max resolution quirk

* Update test_chat_completion.py

* Wire LLM_KV_TOKENIZER_CHAT_TEMPLATE_N in llama_model_chat_template

* Refactor test-chat-template

* Test templates w/ minja

* Fix deprecation

* Add --jinja to llama-run

* Update common_chat_format_example to use minja template wrapper

* Test chat_template in e2e test

* Update utils.py

* Update test_chat_completion.py

* Update run.cpp

* Update arg.cpp

* Refactor common_chat_* functions to accept minja template + use_jinja option

* Attempt to fix linkage of LLAMA_CHATML_TEMPLATE

* Revert LLAMA_CHATML_TEMPLATE refactor

* Normalize newlines in test-chat-templates for windows tests

* Forward decl minja::chat_template to avoid eager json dep

* Flush stdout in chat template before potential crash

* Fix copy elision warning

* Rm unused optional include

* Add missing optional include to server.cpp

* Disable jinja test that has a cryptic windows failure

* minja: fix vigogne (https://github.com/google/minja/pull/22)

* Apply suggestions from code review

Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Finish suggested renamings

* Move chat_templates inside server_context + remove mutex

* Update --chat-template-file w/ recent change to --chat-template

* Refactor chat template validation

* Guard against missing eos/bos tokens (null token otherwise throws in llama_vocab::impl::token_get_attr)

* Warn against missing eos / bos tokens when jinja template references them

* rename: common_chat_template[s]

* reinstate assert on chat_templates.template_default

* Update minja to b8437df626

* Update minja to https://github.com/google/minja/pull/25

* Update minja from https://github.com/google/minja/pull/27

* rm unused optional header

---------

Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Olivier Chafik 2025-01-21 13:18:51 +00:00 committed by GitHub
parent e28245f35f
commit 6171c9d258
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 3563 additions and 133 deletions

View file

@ -29,7 +29,7 @@ add_library(llama
unicode-data.cpp
)
target_include_directories(llama PUBLIC . ../include)
target_include_directories(llama PUBLIC . ../include ../common)
target_compile_features (llama PUBLIC cxx_std_17) # don't bump
target_link_libraries(llama PUBLIC ggml)

View file

@ -179,6 +179,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
{ LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
{ LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
{ LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
{ LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
@ -1443,10 +1444,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
};
LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
std::string LLM_KV::operator()(llm_kv kv) const {
return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
return suffix ? ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch), suffix)
: ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
}
std::string LLM_TN_IMPL::str() const {

View file

@ -177,6 +177,7 @@ enum llm_kv {
LLM_KV_TOKENIZER_HF_JSON,
LLM_KV_TOKENIZER_RWKV,
LLM_KV_TOKENIZER_CHAT_TEMPLATE,
LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
LLM_KV_TOKENIZER_FIM_PRE_ID,
LLM_KV_TOKENIZER_FIM_SUF_ID,
LLM_KV_TOKENIZER_FIM_MID_ID,
@ -335,9 +336,10 @@ enum llm_tensor_layer {
};
struct LLM_KV {
LLM_KV(llm_arch arch);
LLM_KV(llm_arch arch, const char * suffix = nullptr);
llm_arch arch;
const char * suffix;
std::string operator()(llm_kv kv) const;
};

View file

@ -3955,8 +3955,10 @@ uint64_t llama_model_size(const struct llama_model * model) {
return model->size();
}
const char * llama_model_chat_template(const struct llama_model * model) {
const auto & it = model->gguf_kv.find(LLM_KV(model->arch)(LLM_KV_TOKENIZER_CHAT_TEMPLATE));
const char * llama_model_chat_template(const struct llama_model * model, const char * name) {
const auto key = name ? LLM_KV(model->arch, name)(LLM_KV_TOKENIZER_CHAT_TEMPLATE_N)
: LLM_KV(model->arch)(LLM_KV_TOKENIZER_CHAT_TEMPLATE);
const auto & it = model->gguf_kv.find(key);
if (it == model->gguf_kv.end()) {
return nullptr;
}