Merge branch 'master' into gg/llama-kv-cache
ggml-ci
This commit is contained in:
commit
0f1c1cab2c
83 changed files with 3593 additions and 1410 deletions
|
@ -24,15 +24,16 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "chat-template.hpp"
|
||||
#include "common.h"
|
||||
#include "json.hpp"
|
||||
#include "linenoise.cpp/linenoise.h"
|
||||
#include "llama-cpp.h"
|
||||
#include "chat-template.hpp"
|
||||
#include "log.h"
|
||||
|
||||
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
|
||||
[[noreturn]] static void sigint_handler(int) {
|
||||
printf("\n\033[0m");
|
||||
printf("\n" LOG_COL_DEFAULT);
|
||||
exit(0); // not ideal, but it's the only way to guarantee exit in all cases
|
||||
}
|
||||
#endif
|
||||
|
@ -847,7 +848,15 @@ static int apply_chat_template(const common_chat_template & tmpl, LlamaData & ll
|
|||
});
|
||||
}
|
||||
try {
|
||||
auto result = tmpl.apply(messages, /* tools= */ json(), append);
|
||||
minja::chat_template_inputs tmpl_inputs;
|
||||
tmpl_inputs.messages = messages;
|
||||
tmpl_inputs.add_generation_prompt = append;
|
||||
|
||||
minja::chat_template_options tmpl_opts;
|
||||
tmpl_opts.use_bos_token = false;
|
||||
tmpl_opts.use_eos_token = false;
|
||||
|
||||
auto result = tmpl.apply(tmpl_inputs, tmpl_opts);
|
||||
llama_data.fmtted.resize(result.size() + 1);
|
||||
memcpy(llama_data.fmtted.data(), result.c_str(), result.size() + 1);
|
||||
return result.size();
|
||||
|
@ -890,7 +899,7 @@ static int check_context_size(const llama_context_ptr & ctx, const llama_batch &
|
|||
const int n_ctx = llama_n_ctx(ctx.get());
|
||||
const int n_ctx_used = llama_kv_self_used_cells(ctx.get());
|
||||
if (n_ctx_used + batch.n_tokens > n_ctx) {
|
||||
printf("\033[0m\n");
|
||||
printf(LOG_COL_DEFAULT "\n");
|
||||
printe("context size exceeded\n");
|
||||
return 1;
|
||||
}
|
||||
|
@ -953,7 +962,7 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str
|
|||
batch = llama_batch_get_one(&new_token_id, 1);
|
||||
}
|
||||
|
||||
printf("\033[0m");
|
||||
printf(LOG_COL_DEFAULT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -962,7 +971,7 @@ static int read_user_input(std::string & user_input) {
|
|||
#ifdef WIN32
|
||||
printf(
|
||||
"\r%*s"
|
||||
"\r\033[0m%s",
|
||||
"\r" LOG_COL_DEFAULT "%s",
|
||||
get_terminal_width(), " ", prompt_prefix);
|
||||
|
||||
std::getline(std::cin, user_input);
|
||||
|
@ -999,7 +1008,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt,
|
|||
const bool stdout_a_terminal) {
|
||||
// Set response color
|
||||
if (stdout_a_terminal) {
|
||||
printf("\033[33m");
|
||||
printf(LOG_COL_YELLOW);
|
||||
}
|
||||
|
||||
if (generate(llama_data, prompt, response)) {
|
||||
|
@ -1008,7 +1017,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt,
|
|||
}
|
||||
|
||||
// End response with color reset and newline
|
||||
printf("\n%s", stdout_a_terminal ? "\033[0m" : "");
|
||||
printf("\n%s", stdout_a_terminal ? LOG_COL_DEFAULT : "");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue