main: add need_insert_eot
This commit is contained in:
parent
20fc3804bf
commit
d7a877e244
2 changed files with 23 additions and 4 deletions
|
@ -1409,7 +1409,9 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||||
"halt generation at PROMPT, return control in interactive mode\n"
|
"halt generation at PROMPT, return control in interactive mode\n"
|
||||||
"can be specified more than once for multiple prompts" });
|
"can be specified more than once for multiple prompts" });
|
||||||
options.push_back({ "main", "-sp, --special", "special tokens output enabled (default: %s)", params.special ? "true" : "false" });
|
options.push_back({ "main", "-sp, --special", "special tokens output enabled (default: %s)", params.special ? "true" : "false" });
|
||||||
options.push_back({ "main", "-cnv, --conversation", "run in conversation mode (does not print special tokens and suffix/prefix, use default chat template) (default: %s)", params.conversation ? "true" : "false" });
|
options.push_back({ "main", "-cnv, --conversation", "run in conversation mode, does not print special tokens and suffix/prefix\n"
|
||||||
|
"if suffix/prefix are not specified, default chat template will be used\n"
|
||||||
|
"(default: %s)", params.conversation ? "true" : "false" });
|
||||||
options.push_back({ "main infill", "-i, --interactive", "run in interactive mode (default: %s)", params.interactive ? "true" : "false" });
|
options.push_back({ "main infill", "-i, --interactive", "run in interactive mode (default: %s)", params.interactive ? "true" : "false" });
|
||||||
options.push_back({ "main infill", "-if, --interactive-first", "run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false" });
|
options.push_back({ "main infill", "-if, --interactive-first", "run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false" });
|
||||||
options.push_back({ "main infill", "-mli, --multiline-input", "allows you to write or paste multiple lines without ending each in '\\'" });
|
options.push_back({ "main infill", "-mli, --multiline-input", "allows you to write or paste multiple lines without ending each in '\\'" });
|
||||||
|
@ -1453,6 +1455,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||||
options.push_back({ "main", " --cfg-scale N", "strength of guidance (default: %.1f, 1.0 = disable)", (double)sparams.cfg_scale });
|
options.push_back({ "main", " --cfg-scale N", "strength of guidance (default: %.1f, 1.0 = disable)", (double)sparams.cfg_scale });
|
||||||
options.push_back({ "main", " --chat-template JINJA_TEMPLATE",
|
options.push_back({ "main", " --chat-template JINJA_TEMPLATE",
|
||||||
"set custom jinja chat template (default: template taken from model's metadata)\n"
|
"set custom jinja chat template (default: template taken from model's metadata)\n"
|
||||||
|
"if suffix/prefix are specified, template will be disabled\n"
|
||||||
"only commonly used templates are accepted:\n"
|
"only commonly used templates are accepted:\n"
|
||||||
"https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
|
"https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
|
||||||
options.push_back({ "grammar" });
|
options.push_back({ "grammar" });
|
||||||
|
|
|
@ -37,7 +37,8 @@ static gpt_params * g_params;
|
||||||
static std::vector<llama_token> * g_input_tokens;
|
static std::vector<llama_token> * g_input_tokens;
|
||||||
static std::ostringstream * g_output_ss;
|
static std::ostringstream * g_output_ss;
|
||||||
static std::vector<llama_token> * g_output_tokens;
|
static std::vector<llama_token> * g_output_tokens;
|
||||||
static bool is_interacting = false;
|
static bool is_interacting = false;
|
||||||
|
static bool need_insert_eot = false;
|
||||||
|
|
||||||
static bool file_exists(const std::string & path) {
|
static bool file_exists(const std::string & path) {
|
||||||
std::ifstream f(path.c_str());
|
std::ifstream f(path.c_str());
|
||||||
|
@ -99,7 +100,8 @@ static void write_logfile(
|
||||||
static void sigint_handler(int signo) {
|
static void sigint_handler(int signo) {
|
||||||
if (signo == SIGINT) {
|
if (signo == SIGINT) {
|
||||||
if (!is_interacting && g_params->interactive) {
|
if (!is_interacting && g_params->interactive) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
|
need_insert_eot = true;
|
||||||
} else {
|
} else {
|
||||||
console::cleanup();
|
console::cleanup();
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
@ -224,7 +226,14 @@ int main(int argc, char ** argv) {
|
||||||
__func__, n_ctx_train, n_ctx);
|
__func__, n_ctx_train, n_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str());
|
// print chat template example in conversation mode
|
||||||
|
if (params.conversation) {
|
||||||
|
if (params.enable_chat_template) {
|
||||||
|
LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str());
|
||||||
|
} else {
|
||||||
|
LOG_TEE("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
|
@ -885,6 +894,13 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
|
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
|
||||||
|
|
||||||
|
// if user stop generation mid-way, we must add EOT to finish model's last response
|
||||||
|
if (need_insert_eot && format_chat) {
|
||||||
|
llama_token eot = llama_token_eot(model);
|
||||||
|
embd_inp.push_back(eot == -1 ? llama_token_eos(model) : eot);
|
||||||
|
need_insert_eot = false;
|
||||||
|
}
|
||||||
|
|
||||||
embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end());
|
embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end());
|
||||||
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
||||||
embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());
|
embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue