Merge branch 'master' into gg/llama-kv-cache

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-02-06 10:04:33 +02:00
commit 0f1c1cab2c
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
83 changed files with 3593 additions and 1410 deletions

View file

@ -24,15 +24,16 @@
#include <string>
#include <vector>
#include "chat-template.hpp"
#include "common.h"
#include "json.hpp"
#include "linenoise.cpp/linenoise.h"
#include "llama-cpp.h"
#include "chat-template.hpp"
#include "log.h"
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
[[noreturn]] static void sigint_handler(int) {
printf("\n\033[0m");
printf("\n" LOG_COL_DEFAULT);
exit(0); // not ideal, but it's the only way to guarantee exit in all cases
}
#endif
@ -847,7 +848,15 @@ static int apply_chat_template(const common_chat_template & tmpl, LlamaData & ll
});
}
try {
auto result = tmpl.apply(messages, /* tools= */ json(), append);
minja::chat_template_inputs tmpl_inputs;
tmpl_inputs.messages = messages;
tmpl_inputs.add_generation_prompt = append;
minja::chat_template_options tmpl_opts;
tmpl_opts.use_bos_token = false;
tmpl_opts.use_eos_token = false;
auto result = tmpl.apply(tmpl_inputs, tmpl_opts);
llama_data.fmtted.resize(result.size() + 1);
memcpy(llama_data.fmtted.data(), result.c_str(), result.size() + 1);
return result.size();
@ -890,7 +899,7 @@ static int check_context_size(const llama_context_ptr & ctx, const llama_batch &
const int n_ctx = llama_n_ctx(ctx.get());
const int n_ctx_used = llama_kv_self_used_cells(ctx.get());
if (n_ctx_used + batch.n_tokens > n_ctx) {
printf("\033[0m\n");
printf(LOG_COL_DEFAULT "\n");
printe("context size exceeded\n");
return 1;
}
@ -953,7 +962,7 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str
batch = llama_batch_get_one(&new_token_id, 1);
}
printf("\033[0m");
printf(LOG_COL_DEFAULT);
return 0;
}
@ -962,7 +971,7 @@ static int read_user_input(std::string & user_input) {
#ifdef WIN32
printf(
"\r%*s"
"\r\033[0m%s",
"\r" LOG_COL_DEFAULT "%s",
get_terminal_width(), " ", prompt_prefix);
std::getline(std::cin, user_input);
@ -999,7 +1008,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt,
const bool stdout_a_terminal) {
// Set response color
if (stdout_a_terminal) {
printf("\033[33m");
printf(LOG_COL_YELLOW);
}
if (generate(llama_data, prompt, response)) {
@ -1008,7 +1017,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt,
}
// End response with color reset and newline
printf("\n%s", stdout_a_terminal ? "\033[0m" : "");
printf("\n%s", stdout_a_terminal ? LOG_COL_DEFAULT : "");
return 0;
}