Add Jinja template support (#11016)

* Copy minja from 58f0ca6dd7

* Add --jinja and --chat-template-file flags

* Add missing <optional> include

* Avoid print in get_hf_chat_template.py

* No designated initializers yet

* Try and work around msvc++ non-macro max resolution quirk

* Update test_chat_completion.py

* Wire LLM_KV_TOKENIZER_CHAT_TEMPLATE_N in llama_model_chat_template

* Refactor test-chat-template

* Test templates w/ minja

* Fix deprecation

* Add --jinja to llama-run

* Update common_chat_format_example to use minja template wrapper

* Test chat_template in e2e test

* Update utils.py

* Update test_chat_completion.py

* Update run.cpp

* Update arg.cpp

* Refactor common_chat_* functions to accept minja template + use_jinja option

* Attempt to fix linkage of LLAMA_CHATML_TEMPLATE

* Revert LLAMA_CHATML_TEMPLATE refactor

* Normalize newlines in test-chat-templates for windows tests

* Forward decl minja::chat_template to avoid eager json dep

* Flush stdout in chat template before potential crash

* Fix copy elision warning

* Rm unused optional include

* Add missing optional include to server.cpp

* Disable jinja test that has a cryptic windows failure

* minja: fix vigogne (https://github.com/google/minja/pull/22)

* Apply suggestions from code review

Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Finish suggested renamings

* Move chat_templates inside server_context + remove mutex

* Update --chat-template-file w/ recent change to --chat-template

* Refactor chat template validation

* Guard against missing eos/bos tokens (null token otherwise throws in llama_vocab::impl::token_get_attr)

* Warn against missing eos / bos tokens when jinja template references them

* rename: common_chat_template[s]

* reinstate assert on chat_templates.template_default

* Update minja to b8437df626

* Update minja to https://github.com/google/minja/pull/25

* Update minja from https://github.com/google/minja/pull/27

* rm unused optional header

---------

Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Olivier Chafik 2025-01-21 13:18:51 +00:00 committed by GitHub
parent e28245f35f
commit 6171c9d258
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 3563 additions and 133 deletions

View file

@ -334,6 +334,7 @@ struct common_params {
std::string hostname = "127.0.0.1";
std::string public_path = ""; // NOLINT
std::string chat_template = ""; // NOLINT
bool use_jinja = false; // NOLINT
bool enable_chat_template = true;
std::vector<std::string> api_keys;
@ -603,30 +604,43 @@ struct common_chat_msg {
std::string content;
};
// Get the built-in chat template for the model. Return empty string if not present.
std::string common_get_builtin_chat_template(const struct llama_model * model);
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
bool common_chat_verify_template(const std::string & tmpl);
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
namespace minja {
class chat_template;
}
typedef minja::chat_template common_chat_template;
struct common_chat_templates {
bool has_explicit_template; // Model had builtin template or template overridde was specified.
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
std::unique_ptr<common_chat_template> template_tool_use;
};
// CPP wrapper for llama_chat_apply_template
// If the built-in template is not supported, we default to chatml
// If the custom "tmpl" is not supported, we throw an error
std::string common_chat_apply_template(const struct llama_model * model,
const std::string & tmpl,
std::string common_chat_apply_template(
const common_chat_template & tmpl,
const std::vector<common_chat_msg> & chat,
bool add_ass);
bool add_ass,
bool use_jinja);
// Format single message, while taking into account the position of that message in chat history
std::string common_chat_format_single(const struct llama_model * model,
const std::string & tmpl,
std::string common_chat_format_single(
const common_chat_template & tmpl,
const std::vector<common_chat_msg> & past_msg,
const common_chat_msg & new_msg,
bool add_ass);
bool add_ass,
bool use_jinja);
// Returns an example of formatted chat
std::string common_chat_format_example(const struct llama_model * model,
const std::string & tmpl);
std::string common_chat_format_example(
const common_chat_template & tmpl, bool use_jinja);
common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override);
//
// KV cache utils