update implementation
This commit is contained in:
parent
0dc9dcff13
commit
7c60721217
8 changed files with 120 additions and 65 deletions
|
@ -1,5 +1,9 @@
|
||||||
--clean-interface --interactive-first --keep -1
|
--clean-interface
|
||||||
--in-prefix-bos --in-prefix "\n\n### Instruction:\n\n" -r "### Instruction:\n\n"
|
--interactive-first
|
||||||
--in-suffix "\n\n### Response:\n\n"
|
--keep -1
|
||||||
|
--ins-prefix-bos
|
||||||
|
--ins-prefix "\n\n### Instruction:\n\n"
|
||||||
|
--ins-suffix "\n\n### Response:\n\n"
|
||||||
|
--reverse-prompt "### Instruction:\n\n"
|
||||||
|
|
||||||
-p "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n"
|
-p "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n"
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
--interactive-first --keep -1
|
--interactive-first
|
||||||
--in-prefix-bos
|
--keep -1
|
||||||
--in-prefix "\nUser: " -r "User: "
|
--ins-prefix-bos
|
||||||
--in-suffix "\nBob: "
|
--ins-prefix "\nUser: "
|
||||||
|
--ins-suffix "\nBob: "
|
||||||
|
--reverse-prompt "User: "
|
||||||
|
--rm-trailing-space-workaround
|
||||||
|
|
||||||
-p "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
|
-p "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
|
||||||
User: Hello, Bob.
|
User: Hello, Bob.
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
--interactive-first --keep -1
|
--interactive-first
|
||||||
--in-prefix-bos
|
--keep -1
|
||||||
--in-prefix "\n### Human: " --reverse-prompt "### Human: "
|
--ins-prefix-bos
|
||||||
--in-suffix "\n### Assistant: "
|
--ins-prefix "\n### Human: "
|
||||||
|
--ins-suffix "\n### Assistant: "
|
||||||
|
--reverse-prompt "### Human: "
|
||||||
|
--rm-trailing-space-workaround
|
||||||
|
|
|
@ -1,4 +1,8 @@
|
||||||
--interactive-first --keep -1
|
--interactive-first
|
||||||
--in-prefix-bos
|
--keep -1
|
||||||
--in-prefix "\n### Human: " --reverse-prompt "### Human: "
|
--ins-prefix-bos
|
||||||
--in-suffix "\n### Assistant: " --stop-prompt "### Assistant: "
|
--ins-prefix "\n### Human: "
|
||||||
|
--ins-suffix "\n### Assistant: "
|
||||||
|
--reverse-prompt "### Human: "
|
||||||
|
--stop-prompt "### Assistant: "
|
||||||
|
--rm-trailing-space-workaround
|
||||||
|
|
|
@ -1,5 +1,9 @@
|
||||||
--interactive-first --keep -1
|
--interactive-first
|
||||||
--in-prefix-bos --in-prefix "\n### Human: " -r "### Human: "
|
--keep -1
|
||||||
--in-suffix "\n### Assistant: "
|
--ins-prefix-bos
|
||||||
|
--ins-prefix "\n### Human: "
|
||||||
|
--ins-suffix "\n### Assistant: "
|
||||||
|
--reverse-prompt "### Human: "
|
||||||
|
--rm-trailing-space-workaround
|
||||||
|
|
||||||
-p "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
|
-p "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
|
||||||
|
|
|
@ -231,18 +231,18 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
"--clean-interface "
|
"--clean-interface "
|
||||||
"--interactive-first "
|
"--interactive-first "
|
||||||
"--keep -1 "
|
"--keep -1 "
|
||||||
"--in-prefix-bos "
|
"--ins-prefix-bos "
|
||||||
"--in-prefix \"\\n\\n### Instruction:\\n\\n\" "
|
"--ins-prefix \"\\n\\n### Instruction:\\n\\n\" "
|
||||||
"--in-suffix \"\\n\\n### Response:\\n\\n\" "
|
"--ins-suffix \"\\n\\n### Response:\\n\\n\" "
|
||||||
"-r \"### Instruction:\\n\\n\" "
|
"-r \"### Instruction:\\n\\n\" "
|
||||||
"\n\n");
|
"\n\n");
|
||||||
// params.instruct = true;
|
// params.instruct = true;
|
||||||
params.clean_interface = true;
|
params.clean_interface = true;
|
||||||
params.interactive_start = true;
|
params.interactive_start = true;
|
||||||
params.n_keep = -1;
|
params.n_keep = -1;
|
||||||
params.input_prefix_bos = true;
|
params.instruct_prefix_bos = true;
|
||||||
params.input_prefix = "\n\n### Instruction:\n\n";
|
params.instruct_prefix = "\n\n### Instruction:\n\n";
|
||||||
params.input_suffix = "\n\n### Response:\n\n";
|
params.instruct_suffix = "\n\n### Response:\n\n";
|
||||||
params.antiprompt.push_back("### Instruction:\n\n");
|
params.antiprompt.push_back("### Instruction:\n\n");
|
||||||
} else if (arg == "--color") {
|
} else if (arg == "--color") {
|
||||||
params.use_color = true;
|
params.use_color = true;
|
||||||
|
@ -268,6 +268,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.stopprompt.push_back(args[i]);
|
params.stopprompt.push_back(args[i]);
|
||||||
|
} else if (arg == "--rm-trailing-space-workaround") {
|
||||||
|
params.rm_trailing_space_workaround = true;
|
||||||
} else if (arg == "--perplexity") {
|
} else if (arg == "--perplexity") {
|
||||||
params.perplexity = true;
|
params.perplexity = true;
|
||||||
} else if (arg == "--ignore-eos") {
|
} else if (arg == "--ignore-eos") {
|
||||||
|
@ -283,22 +285,28 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
exit(0);
|
exit(0);
|
||||||
} else if (arg == "--random-prompt") {
|
} else if (arg == "--random-prompt") {
|
||||||
params.random_prompt = true;
|
params.random_prompt = true;
|
||||||
} else if (arg == "--in-prefix-bos") {
|
|
||||||
params.input_prefix_bos = true;
|
|
||||||
} else if (arg == "--in-prefix") {
|
} else if (arg == "--in-prefix") {
|
||||||
if (++i >= args_c) {
|
if (++i >= args_c) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.input_prefix = args[i];
|
params.input_prefix = args[i];
|
||||||
} else if (arg == "--in-suffix-bos") {
|
} else if (arg == "--ins-prefix-bos") {
|
||||||
params.input_suffix_bos = true;
|
params.instruct_prefix_bos = true;
|
||||||
} else if (arg == "--in-suffix") {
|
} else if (arg == "--ins-prefix") {
|
||||||
if (++i >= args_c) {
|
if (++i >= args_c) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.input_suffix = args[i];
|
params.instruct_prefix = args[i];
|
||||||
|
} else if (arg == "--ins-suffix-bos") {
|
||||||
|
params.instruct_suffix_bos = true;
|
||||||
|
} else if (arg == "--ins-suffix") {
|
||||||
|
if (++i >= args_c) {
|
||||||
|
invalid_param = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
params.instruct_suffix = args[i];
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||||
gpt_print_usage(argv[0], default_params);
|
gpt_print_usage(argv[0], default_params);
|
||||||
|
@ -332,10 +340,11 @@ void gpt_print_usage(char * argv_0, const gpt_params & params) {
|
||||||
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
|
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
|
||||||
fprintf(stderr, " prompt to start generation with (default: empty)\n");
|
fprintf(stderr, " prompt to start generation with (default: empty)\n");
|
||||||
fprintf(stderr, " --random-prompt start with a randomized prompt.\n");
|
fprintf(stderr, " --random-prompt start with a randomized prompt.\n");
|
||||||
fprintf(stderr, " --in-prefix-bos append bos token to input prefix.\n");
|
|
||||||
fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
|
fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
|
||||||
fprintf(stderr, " --in-suffix-bos append bos token to input suffix.\n");
|
fprintf(stderr, " --ins-prefix STRING (instruct) prefix user inputs with tokenized string (default: empty)\n");
|
||||||
fprintf(stderr, " --in-suffix STRING string to suffix user inputs with (default: empty)\n");
|
fprintf(stderr, " --ins-prefix-bos (instruct) prepend bos token to instruct prefix.\n");
|
||||||
|
fprintf(stderr, " --ins-suffix STRING (instruct) suffix user inputs with tokenized string (default: empty)\n");
|
||||||
|
fprintf(stderr, " --ins-suffix-bos (instruct) prepend bos token to instruct suffix.\n");
|
||||||
fprintf(stderr, " -f FNAME, --file FNAME\n");
|
fprintf(stderr, " -f FNAME, --file FNAME\n");
|
||||||
fprintf(stderr, " prompt file to start generation.\n");
|
fprintf(stderr, " prompt file to start generation.\n");
|
||||||
fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
|
fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
|
||||||
|
|
|
@ -32,12 +32,16 @@ struct gpt_params {
|
||||||
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
|
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
|
||||||
std::string prompt = "";
|
std::string prompt = "";
|
||||||
std::string input_prefix = ""; // string to prefix user inputs with
|
std::string input_prefix = ""; // string to prefix user inputs with
|
||||||
bool input_prefix_bos = false; // append bos token to input prefix
|
|
||||||
std::string input_suffix = ""; // string to suffix user inputs with
|
std::string instruct_prefix = ""; // prefix user inputs with tokenized string
|
||||||
bool input_suffix_bos = false; // append bos token to input suffix
|
bool instruct_prefix_bos = false; // prepend bos token to instruct prefix
|
||||||
|
std::string instruct_suffix = ""; // suffix user inputs with tokenized string
|
||||||
|
bool instruct_suffix_bos = false; // prepend bos token to instruct suffix
|
||||||
|
|
||||||
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
|
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
|
||||||
std::vector<std::string> stopprompt; // string upon seeing which more user input is prompted (without adding prefixes and suffixes)
|
std::vector<std::string> stopprompt; // string upon seeing which more user input is prompted (without adding instruct prefixes and suffixes)
|
||||||
|
|
||||||
|
bool rm_trailing_space_workaround = false; // workaround for removing trailing space from reverse/stop prompts
|
||||||
|
|
||||||
bool memory_f16 = true; // use f16 instead of f32 for memory kv
|
bool memory_f16 = true; // use f16 instead of f32 for memory kv
|
||||||
bool random_prompt = false; // do not randomize prompt if none provided
|
bool random_prompt = false; // do not randomize prompt if none provided
|
||||||
|
|
|
@ -85,6 +85,8 @@ int main(int argc, char ** argv) {
|
||||||
params.prompt = gpt_random_prompt(rng);
|
params.prompt = gpt_random_prompt(rng);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool instruct_mode = params.instruct_prefix.empty() && params.instruct_suffix.empty();
|
||||||
|
|
||||||
// params.prompt = R"(// this function checks if the number n is prime
|
// params.prompt = R"(// this function checks if the number n is prime
|
||||||
//bool is_prime(int n) {)";
|
//bool is_prime(int n) {)";
|
||||||
|
|
||||||
|
@ -154,10 +156,12 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// prefix & suffix for instruct mode
|
// prefix & suffix for instruct mode
|
||||||
const auto inp_pfx = ::llama_tokenize(ctx, params.input_prefix, params.input_prefix_bos);
|
const auto inp_pfx = ::llama_tokenize(ctx, params.instruct_prefix, params.instruct_prefix_bos);
|
||||||
std::string input_suffix = params.input_suffix;
|
std::string instruct_suffix = params.instruct_suffix;
|
||||||
if (input_suffix.back() == ' ') { input_suffix.pop_back(); } // (remove trailing space workaround)
|
if (params.rm_trailing_space_workaround) {
|
||||||
const auto inp_sfx = ::llama_tokenize(ctx, input_suffix, params.input_suffix_bos);
|
if (instruct_suffix.back() == ' ') { instruct_suffix.pop_back(); }
|
||||||
|
}
|
||||||
|
const auto inp_sfx = ::llama_tokenize(ctx, instruct_suffix, params.instruct_suffix_bos);
|
||||||
|
|
||||||
// enable interactive mode if reverse prompt or interactive start is specified
|
// enable interactive mode if reverse prompt or interactive start is specified
|
||||||
if (params.antiprompt.size() != 0 || params.stopprompt.size() != 0 || params.interactive_start) {
|
if (params.antiprompt.size() != 0 || params.stopprompt.size() != 0 || params.interactive_start) {
|
||||||
|
@ -209,10 +213,13 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!params.input_prefix.empty()) {
|
if (!params.input_prefix.empty()) {
|
||||||
fprintf(stderr, "Input prefix %s: '%s'\n", params.input_prefix_bos ? "(with bos token)" : "", params.input_prefix.c_str());
|
fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str());
|
||||||
}
|
}
|
||||||
if (!params.input_suffix.empty()) {
|
if (!params.instruct_prefix.empty()) {
|
||||||
fprintf(stderr, "Input suffix %s: '%s'\n", params.input_suffix_bos ? "(with bos token)" : "", params.input_suffix.c_str());
|
fprintf(stderr, "Instruct prefix %s: '%s'\n", params.instruct_prefix_bos ? "(with bos token)" : "", params.instruct_prefix.c_str());
|
||||||
|
}
|
||||||
|
if (!params.instruct_suffix.empty()) {
|
||||||
|
fprintf(stderr, "Instruct suffix %s: '%s'\n", params.instruct_suffix_bos ? "(with bos token)" : "", params.instruct_suffix.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fprintf(stderr, "sampling: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n",
|
fprintf(stderr, "sampling: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n",
|
||||||
|
@ -232,9 +239,9 @@ int main(int argc, char ** argv) {
|
||||||
);
|
);
|
||||||
if (params.multiline_mode) {
|
if (params.multiline_mode) {
|
||||||
#if defined (_WIN32)
|
#if defined (_WIN32)
|
||||||
fprintf(stderr, " - Press Ctrl+Z and Return (EOF) to return control to LLaMa.\n\n");
|
fprintf(stderr, " - [MULTILINE MODE] Press Ctrl+Z and Return (EOF) to return control to LLaMa.\n\n");
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, " - Press Ctrl+D (EOF) to return control to LLaMa.\n\n");
|
fprintf(stderr, " - [MULTILINE MODE] Press Ctrl+D (EOF) to return control to LLaMa.\n\n");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -320,7 +327,7 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// replace end of text token with newline token when in interactive mode
|
// replace end of text token with newline token when in interactive mode
|
||||||
if (id == llama_token_eos() && params.interactive && params.input_prefix.empty()) {
|
if (id == llama_token_eos() && params.interactive && instruct_mode) {
|
||||||
id = llama_token_newline.front();
|
id = llama_token_newline.front();
|
||||||
if (params.antiprompt.size() != 0) {
|
if (params.antiprompt.size() != 0) {
|
||||||
// tokenize and inject first reverse prompt
|
// tokenize and inject first reverse prompt
|
||||||
|
@ -377,8 +384,10 @@ int main(int argc, char ** argv) {
|
||||||
antiprompt.is_stop_prompt = false;
|
antiprompt.is_stop_prompt = false;
|
||||||
// Check if each of the reverse prompts appears at the end of the output.
|
// Check if each of the reverse prompts appears at the end of the output.
|
||||||
for (std::string & prompt : params.antiprompt) {
|
for (std::string & prompt : params.antiprompt) {
|
||||||
antiprompt.trailing_space = prompt.back() == ' ';
|
if (params.rm_trailing_space_workaround) {
|
||||||
antiprompt.len = prompt.length() - (antiprompt.trailing_space ? 1 : 0);
|
antiprompt.trailing_space = prompt.back() == ' ';
|
||||||
|
antiprompt.len = prompt.length() - (antiprompt.trailing_space ? 1 : 0);
|
||||||
|
}
|
||||||
if (last_output.find(prompt.c_str(), last_output.length() - antiprompt.len, antiprompt.len) != std::string::npos) {
|
if (last_output.find(prompt.c_str(), last_output.length() - antiprompt.len, antiprompt.len) != std::string::npos) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
antiprompt.any = true;
|
antiprompt.any = true;
|
||||||
|
@ -389,8 +398,10 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
if (!antiprompt.any) {
|
if (!antiprompt.any) {
|
||||||
for (std::string & prompt : params.stopprompt) {
|
for (std::string & prompt : params.stopprompt) {
|
||||||
antiprompt.trailing_space = prompt.back() == ' ';
|
if (params.rm_trailing_space_workaround) {
|
||||||
antiprompt.len = prompt.length() - (antiprompt.trailing_space ? 1 : 0);
|
antiprompt.trailing_space = prompt.back() == ' ';
|
||||||
|
antiprompt.len = prompt.length() - (antiprompt.trailing_space ? 1 : 0);
|
||||||
|
}
|
||||||
if (last_output.find(prompt.c_str(), last_output.length() - antiprompt.len, antiprompt.len) != std::string::npos) {
|
if (last_output.find(prompt.c_str(), last_output.length() - antiprompt.len, antiprompt.len) != std::string::npos) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
antiprompt.any = true;
|
antiprompt.any = true;
|
||||||
|
@ -406,21 +417,26 @@ int main(int argc, char ** argv) {
|
||||||
if (n_past > 0 && is_interacting)
|
if (n_past > 0 && is_interacting)
|
||||||
{
|
{
|
||||||
std::string buffer;
|
std::string buffer;
|
||||||
if (!params.clean_interface && !params.input_prefix.empty() && !antiprompt.any) {
|
if (!params.clean_interface && !params.instruct_prefix.empty() && !antiprompt.any) {
|
||||||
// avoid printing again user's new line (TODO: try to revert enter press and print newline)
|
// avoid printing again user's new line (TODO: try to revert enter press and print newline)
|
||||||
int i = params.input_prefix.front() == '\n' ? 1 : 0;
|
int i = params.instruct_prefix.front() == '\n' ? 1 : 0;
|
||||||
for (; i < inp_pfx.size(); i++) {
|
for (; i < inp_pfx.size(); i++) {
|
||||||
printf("%s", llama_token_to_str(ctx, inp_pfx[i]));
|
printf("%s", llama_token_to_str(ctx, inp_pfx[i]));
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
if (antiprompt.any && antiprompt.trailing_space) {
|
if (params.rm_trailing_space_workaround) {
|
||||||
// add back removed trailing space to buffer(workaround)
|
// add only if not stopprompt (as stopprompt could be used to pause
|
||||||
buffer += ' ';
|
// assistant and then continue without input - adding back trailing
|
||||||
if (!params.clean_interface) {
|
// space may mess it up.)
|
||||||
printf("%s", buffer.c_str());
|
if (!antiprompt.is_stop_prompt && antiprompt.any && antiprompt.trailing_space) {
|
||||||
|
// add back removed trailing space to buffer(workaround)
|
||||||
|
buffer += ' ';
|
||||||
|
if (!params.clean_interface) {
|
||||||
|
printf("%s", buffer.c_str());
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// potentially set color to indicate we are taking user input
|
// potentially set color to indicate we are taking user input
|
||||||
|
@ -435,6 +451,11 @@ int main(int argc, char ** argv) {
|
||||||
printf("\n> ");
|
printf("\n> ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!params.input_prefix.empty()) {
|
||||||
|
buffer += params.input_prefix;
|
||||||
|
printf("%s", buffer.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
if (!get_input_text(buffer, !params.multiline_mode)) {
|
if (!get_input_text(buffer, !params.multiline_mode)) {
|
||||||
// input stream is bad
|
// input stream is bad
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -446,13 +467,16 @@ int main(int argc, char ** argv) {
|
||||||
// done taking input, reset color
|
// done taking input, reset color
|
||||||
set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
|
set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
|
||||||
|
|
||||||
if (!params.clean_interface && !params.input_suffix.empty() && !antiprompt.is_stop_prompt) {
|
if (!params.clean_interface && !params.instruct_suffix.empty() && !antiprompt.is_stop_prompt) {
|
||||||
// avoid printing again user's new line (TODO: try to revert enter press and print newline)
|
// avoid printing again user's new line (TODO: try to revert enter press and print newline)
|
||||||
int i = params.input_suffix.front() == '\n' ? 1 : 0;
|
int i = params.instruct_suffix.front() == '\n' ? 1 : 0;
|
||||||
for (; i < inp_sfx.size(); i++) {
|
for (; i < inp_sfx.size(); i++) {
|
||||||
printf("%s", llama_token_to_str(ctx, inp_sfx[i]));
|
printf("%s", llama_token_to_str(ctx, inp_sfx[i]));
|
||||||
}
|
}
|
||||||
// we won't add back removed trailing space here (workaround)
|
// if (remove trailing space workaround) {
|
||||||
|
// We won't add back removed trailing space here, because assistant continues here,
|
||||||
|
// and it may mess up it's output (remove trailing space workaround).
|
||||||
|
// }
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -461,7 +485,7 @@ int main(int argc, char ** argv) {
|
||||||
if (buffer.length() > 1) {
|
if (buffer.length() > 1) {
|
||||||
|
|
||||||
// insert input prefix
|
// insert input prefix
|
||||||
if (!params.input_prefix.empty() && !antiprompt.any) {
|
if (!params.instruct_prefix.empty() && !antiprompt.any) {
|
||||||
n_consumed = embd_inp.size();
|
n_consumed = embd_inp.size();
|
||||||
embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
|
embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
|
||||||
}
|
}
|
||||||
|
@ -470,7 +494,7 @@ int main(int argc, char ** argv) {
|
||||||
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
||||||
|
|
||||||
// insert response suffix
|
// insert response suffix
|
||||||
if (!params.input_suffix.empty() && !antiprompt.is_stop_prompt) {
|
if (!params.instruct_suffix.empty() && !antiprompt.is_stop_prompt) {
|
||||||
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
|
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -487,7 +511,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// end of text token
|
// end of text token
|
||||||
if (!embd.empty() && embd.back() == llama_token_eos()) {
|
if (!embd.empty() && embd.back() == llama_token_eos()) {
|
||||||
if (params.interactive && !params.input_prefix.empty()) {
|
if (instruct_mode) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, " [end of text]\n");
|
fprintf(stderr, " [end of text]\n");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue