Forward decl minja::chat_template to avoid eager json dep
This commit is contained in:
parent
ee1e10e21e
commit
e63520f37a
5 changed files with 40 additions and 21 deletions
|
@ -4,6 +4,7 @@
|
|||
#include "log.h"
|
||||
#include "sampling.h"
|
||||
#include "llama.h"
|
||||
#include "chat-template.hpp"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
@ -200,7 +201,7 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// auto enable conversation mode if chat template is available
|
||||
const bool has_chat_template = !chat_templates.default_template.source().empty();
|
||||
const bool has_chat_template = chat_templates.has_explicit_template && chat_templates.default_template;
|
||||
if (params.conversation_mode == COMMON_CONVERSATION_MODE_AUTO) {
|
||||
if (has_chat_template) {
|
||||
LOG_INF("%s: chat template is available, enabling conversation mode (disable it with -no-cnv)\n", __func__);
|
||||
|
@ -218,7 +219,7 @@ int main(int argc, char ** argv) {
|
|||
// print chat template example in conversation mode
|
||||
if (params.conversation_mode) {
|
||||
if (params.enable_chat_template) {
|
||||
LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(chat_templates.default_template, params.use_jinja).c_str());
|
||||
LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(*chat_templates.default_template, params.use_jinja).c_str());
|
||||
} else {
|
||||
LOG_INF("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__);
|
||||
}
|
||||
|
@ -264,7 +265,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
|
||||
common_chat_msg new_msg{role, content};
|
||||
auto formatted = common_chat_format_single(chat_templates.default_template, chat_msgs, new_msg, role == "user", g_params->use_jinja);
|
||||
auto formatted = common_chat_format_single(*chat_templates.default_template, chat_msgs, new_msg, role == "user", g_params->use_jinja);
|
||||
chat_msgs.push_back({role, content});
|
||||
LOG_DBG("formatted: '%s'\n", formatted.c_str());
|
||||
return formatted;
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "common.h"
|
||||
#include "json.hpp"
|
||||
#include "llama-cpp.h"
|
||||
#include "chat-template.hpp"
|
||||
|
||||
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
|
||||
[[noreturn]] static void sigint_handler(int) {
|
||||
|
@ -936,6 +937,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_
|
|||
int prev_len = 0;
|
||||
llama_data.fmtted.resize(llama_n_ctx(llama_data.context.get()));
|
||||
auto chat_templates = llama_chat_templates_from_model(llama_data.model.get(), "");
|
||||
GGML_ASSERT(chat_templates.default_template);
|
||||
static const bool stdout_a_terminal = is_stdout_a_terminal();
|
||||
while (true) {
|
||||
// Get user input
|
||||
|
@ -946,7 +948,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_
|
|||
|
||||
add_message("user", user.empty() ? user_input : user, llama_data);
|
||||
int new_len;
|
||||
if (apply_chat_template_with_error_handling(chat_templates.default_template, llama_data, true, new_len, use_jinja) < 0) {
|
||||
if (apply_chat_template_with_error_handling(*chat_templates.default_template, llama_data, true, new_len, use_jinja) < 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -961,7 +963,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_
|
|||
}
|
||||
|
||||
add_message("assistant", response, llama_data);
|
||||
if (apply_chat_template_with_error_handling(chat_templates.default_template, llama_data, false, prev_len, use_jinja) < 0) {
|
||||
if (apply_chat_template_with_error_handling(*chat_templates.default_template, llama_data, false, prev_len, use_jinja) < 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1745,8 +1745,9 @@ struct server_context {
|
|||
|
||||
if (use_jinja) {
|
||||
auto templates = llama_chat_templates_from_model(model, "");
|
||||
GGML_ASSERT(templates.default_template);
|
||||
try {
|
||||
templates.default_template.apply({{
|
||||
templates.default_template->apply({{
|
||||
{"role", "user"},
|
||||
{"content", "test"},
|
||||
}}, json(), true);
|
||||
|
@ -3630,6 +3631,7 @@ int main(int argc, char ** argv) {
|
|||
std::lock_guard<std::mutex> lock(chat_templates_mutex);
|
||||
if (!chat_templates) {
|
||||
chat_templates = llama_chat_templates_from_model(ctx_server.model, ctx_server.params_base.chat_template);
|
||||
GGML_ASSERT(chat_templates->default_template);
|
||||
}
|
||||
return *chat_templates;
|
||||
};
|
||||
|
@ -3641,7 +3643,7 @@ int main(int argc, char ** argv) {
|
|||
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
|
||||
{ "total_slots", ctx_server.params_base.n_parallel },
|
||||
{ "model_path", ctx_server.params_base.model },
|
||||
{ "chat_template", templates.default_template.source() },
|
||||
{ "chat_template", templates.default_template->source() },
|
||||
{ "build_info", build_info },
|
||||
};
|
||||
if (ctx_server.params_base.use_jinja && templates.tool_use_template) {
|
||||
|
@ -3868,7 +3870,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
auto body = json::parse(req.body);
|
||||
const auto & templates = get_chat_templates();
|
||||
const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : templates.default_template;
|
||||
const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : *templates.default_template;
|
||||
json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
|
||||
|
||||
return handle_completions_impl(
|
||||
|
@ -4287,8 +4289,8 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// print sample chat example to make it clear which template is used
|
||||
LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
|
||||
get_chat_templates().default_template.source().c_str(),
|
||||
common_chat_format_example(get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str());
|
||||
get_chat_templates().default_template->source().c_str(),
|
||||
common_chat_format_example(*get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str());
|
||||
|
||||
ctx_server.queue_tasks.on_new_task(std::bind(
|
||||
&server_context::process_single_task, &ctx_server, std::placeholders::_1));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue