update implementation
This commit is contained in:
parent
0dc9dcff13
commit
7c60721217
8 changed files with 120 additions and 65 deletions
|
@ -1,5 +1,9 @@
|
|||
--clean-interface --interactive-first --keep -1
|
||||
--in-prefix-bos --in-prefix "\n\n### Instruction:\n\n" -r "### Instruction:\n\n"
|
||||
--in-suffix "\n\n### Response:\n\n"
|
||||
--clean-interface
|
||||
--interactive-first
|
||||
--keep -1
|
||||
--ins-prefix-bos
|
||||
--ins-prefix "\n\n### Instruction:\n\n"
|
||||
--ins-suffix "\n\n### Response:\n\n"
|
||||
--reverse-prompt "### Instruction:\n\n"
|
||||
|
||||
-p "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n"
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
--interactive-first --keep -1
|
||||
--in-prefix-bos
|
||||
--in-prefix "\nUser: " -r "User: "
|
||||
--in-suffix "\nBob: "
|
||||
--interactive-first
|
||||
--keep -1
|
||||
--ins-prefix-bos
|
||||
--ins-prefix "\nUser: "
|
||||
--ins-suffix "\nBob: "
|
||||
--reverse-prompt "User: "
|
||||
--rm-trailing-space-workaround
|
||||
|
||||
-p "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
|
||||
User: Hello, Bob.
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
--interactive-first --keep -1
|
||||
--in-prefix-bos
|
||||
--in-prefix "\n### Human: " --reverse-prompt "### Human: "
|
||||
--in-suffix "\n### Assistant: "
|
||||
--interactive-first
|
||||
--keep -1
|
||||
--ins-prefix-bos
|
||||
--ins-prefix "\n### Human: "
|
||||
--ins-suffix "\n### Assistant: "
|
||||
--reverse-prompt "### Human: "
|
||||
--rm-trailing-space-workaround
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
--interactive-first --keep -1
|
||||
--in-prefix-bos
|
||||
--in-prefix "\n### Human: " --reverse-prompt "### Human: "
|
||||
--in-suffix "\n### Assistant: " --stop-prompt "### Assistant: "
|
||||
--interactive-first
|
||||
--keep -1
|
||||
--ins-prefix-bos
|
||||
--ins-prefix "\n### Human: "
|
||||
--ins-suffix "\n### Assistant: "
|
||||
--reverse-prompt "### Human: "
|
||||
--stop-prompt "### Assistant: "
|
||||
--rm-trailing-space-workaround
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
--interactive-first --keep -1
|
||||
--in-prefix-bos --in-prefix "\n### Human: " -r "### Human: "
|
||||
--in-suffix "\n### Assistant: "
|
||||
--interactive-first
|
||||
--keep -1
|
||||
--ins-prefix-bos
|
||||
--ins-prefix "\n### Human: "
|
||||
--ins-suffix "\n### Assistant: "
|
||||
--reverse-prompt "### Human: "
|
||||
--rm-trailing-space-workaround
|
||||
|
||||
-p "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
|
||||
|
|
|
@ -231,18 +231,18 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
"--clean-interface "
|
||||
"--interactive-first "
|
||||
"--keep -1 "
|
||||
"--in-prefix-bos "
|
||||
"--in-prefix \"\\n\\n### Instruction:\\n\\n\" "
|
||||
"--in-suffix \"\\n\\n### Response:\\n\\n\" "
|
||||
"--ins-prefix-bos "
|
||||
"--ins-prefix \"\\n\\n### Instruction:\\n\\n\" "
|
||||
"--ins-suffix \"\\n\\n### Response:\\n\\n\" "
|
||||
"-r \"### Instruction:\\n\\n\" "
|
||||
"\n\n");
|
||||
// params.instruct = true;
|
||||
params.clean_interface = true;
|
||||
params.interactive_start = true;
|
||||
params.n_keep = -1;
|
||||
params.input_prefix_bos = true;
|
||||
params.input_prefix = "\n\n### Instruction:\n\n";
|
||||
params.input_suffix = "\n\n### Response:\n\n";
|
||||
params.instruct_prefix_bos = true;
|
||||
params.instruct_prefix = "\n\n### Instruction:\n\n";
|
||||
params.instruct_suffix = "\n\n### Response:\n\n";
|
||||
params.antiprompt.push_back("### Instruction:\n\n");
|
||||
} else if (arg == "--color") {
|
||||
params.use_color = true;
|
||||
|
@ -268,6 +268,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
break;
|
||||
}
|
||||
params.stopprompt.push_back(args[i]);
|
||||
} else if (arg == "--rm-trailing-space-workaround") {
|
||||
params.rm_trailing_space_workaround = true;
|
||||
} else if (arg == "--perplexity") {
|
||||
params.perplexity = true;
|
||||
} else if (arg == "--ignore-eos") {
|
||||
|
@ -283,22 +285,28 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
exit(0);
|
||||
} else if (arg == "--random-prompt") {
|
||||
params.random_prompt = true;
|
||||
} else if (arg == "--in-prefix-bos") {
|
||||
params.input_prefix_bos = true;
|
||||
} else if (arg == "--in-prefix") {
|
||||
if (++i >= args_c) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.input_prefix = args[i];
|
||||
} else if (arg == "--in-suffix-bos") {
|
||||
params.input_suffix_bos = true;
|
||||
} else if (arg == "--in-suffix") {
|
||||
} else if (arg == "--ins-prefix-bos") {
|
||||
params.instruct_prefix_bos = true;
|
||||
} else if (arg == "--ins-prefix") {
|
||||
if (++i >= args_c) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.input_suffix = args[i];
|
||||
params.instruct_prefix = args[i];
|
||||
} else if (arg == "--ins-suffix-bos") {
|
||||
params.instruct_suffix_bos = true;
|
||||
} else if (arg == "--ins-suffix") {
|
||||
if (++i >= args_c) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.instruct_suffix = args[i];
|
||||
} else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
gpt_print_usage(argv[0], default_params);
|
||||
|
@ -332,10 +340,11 @@ void gpt_print_usage(char * argv_0, const gpt_params & params) {
|
|||
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
|
||||
fprintf(stderr, " prompt to start generation with (default: empty)\n");
|
||||
fprintf(stderr, " --random-prompt start with a randomized prompt.\n");
|
||||
fprintf(stderr, " --in-prefix-bos append bos token to input prefix.\n");
|
||||
fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
|
||||
fprintf(stderr, " --in-suffix-bos append bos token to input suffix.\n");
|
||||
fprintf(stderr, " --in-suffix STRING string to suffix user inputs with (default: empty)\n");
|
||||
fprintf(stderr, " --ins-prefix STRING (instruct) prefix user inputs with tokenized string (default: empty)\n");
|
||||
fprintf(stderr, " --ins-prefix-bos (instruct) prepend bos token to instruct prefix.\n");
|
||||
fprintf(stderr, " --ins-suffix STRING (instruct) suffix user inputs with tokenized string (default: empty)\n");
|
||||
fprintf(stderr, " --ins-suffix-bos (instruct) prepend bos token to instruct suffix.\n");
|
||||
fprintf(stderr, " -f FNAME, --file FNAME\n");
|
||||
fprintf(stderr, " prompt file to start generation.\n");
|
||||
fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
|
||||
|
|
|
@ -32,12 +32,16 @@ struct gpt_params {
|
|||
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
|
||||
std::string prompt = "";
|
||||
std::string input_prefix = ""; // string to prefix user inputs with
|
||||
bool input_prefix_bos = false; // append bos token to input prefix
|
||||
std::string input_suffix = ""; // string to suffix user inputs with
|
||||
bool input_suffix_bos = false; // append bos token to input suffix
|
||||
|
||||
std::string instruct_prefix = ""; // prefix user inputs with tokenized string
|
||||
bool instruct_prefix_bos = false; // prepend bos token to instruct prefix
|
||||
std::string instruct_suffix = ""; // suffix user inputs with tokenized string
|
||||
bool instruct_suffix_bos = false; // prepend bos token to instruct suffix
|
||||
|
||||
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
|
||||
std::vector<std::string> stopprompt; // string upon seeing which more user input is prompted (without adding prefixes and suffixes)
|
||||
std::vector<std::string> stopprompt; // string upon seeing which more user input is prompted (without adding instruct prefixes and suffixes)
|
||||
|
||||
bool rm_trailing_space_workaround = false; // workaround for removing trailing space from reverse/stop prompts
|
||||
|
||||
bool memory_f16 = true; // use f16 instead of f32 for memory kv
|
||||
bool random_prompt = false; // do not randomize prompt if none provided
|
||||
|
|
|
@ -85,6 +85,8 @@ int main(int argc, char ** argv) {
|
|||
params.prompt = gpt_random_prompt(rng);
|
||||
}
|
||||
|
||||
bool instruct_mode = params.instruct_prefix.empty() && params.instruct_suffix.empty();
|
||||
|
||||
// params.prompt = R"(// this function checks if the number n is prime
|
||||
//bool is_prime(int n) {)";
|
||||
|
||||
|
@ -154,10 +156,12 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// prefix & suffix for instruct mode
|
||||
const auto inp_pfx = ::llama_tokenize(ctx, params.input_prefix, params.input_prefix_bos);
|
||||
std::string input_suffix = params.input_suffix;
|
||||
if (input_suffix.back() == ' ') { input_suffix.pop_back(); } // (remove trailing space workaround)
|
||||
const auto inp_sfx = ::llama_tokenize(ctx, input_suffix, params.input_suffix_bos);
|
||||
const auto inp_pfx = ::llama_tokenize(ctx, params.instruct_prefix, params.instruct_prefix_bos);
|
||||
std::string instruct_suffix = params.instruct_suffix;
|
||||
if (params.rm_trailing_space_workaround) {
|
||||
if (instruct_suffix.back() == ' ') { instruct_suffix.pop_back(); }
|
||||
}
|
||||
const auto inp_sfx = ::llama_tokenize(ctx, instruct_suffix, params.instruct_suffix_bos);
|
||||
|
||||
// enable interactive mode if reverse prompt or interactive start is specified
|
||||
if (params.antiprompt.size() != 0 || params.stopprompt.size() != 0 || params.interactive_start) {
|
||||
|
@ -209,10 +213,13 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
if (!params.input_prefix.empty()) {
|
||||
fprintf(stderr, "Input prefix %s: '%s'\n", params.input_prefix_bos ? "(with bos token)" : "", params.input_prefix.c_str());
|
||||
fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str());
|
||||
}
|
||||
if (!params.input_suffix.empty()) {
|
||||
fprintf(stderr, "Input suffix %s: '%s'\n", params.input_suffix_bos ? "(with bos token)" : "", params.input_suffix.c_str());
|
||||
if (!params.instruct_prefix.empty()) {
|
||||
fprintf(stderr, "Instruct prefix %s: '%s'\n", params.instruct_prefix_bos ? "(with bos token)" : "", params.instruct_prefix.c_str());
|
||||
}
|
||||
if (!params.instruct_suffix.empty()) {
|
||||
fprintf(stderr, "Instruct suffix %s: '%s'\n", params.instruct_suffix_bos ? "(with bos token)" : "", params.instruct_suffix.c_str());
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "sampling: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n",
|
||||
|
@ -232,9 +239,9 @@ int main(int argc, char ** argv) {
|
|||
);
|
||||
if (params.multiline_mode) {
|
||||
#if defined (_WIN32)
|
||||
fprintf(stderr, " - Press Ctrl+Z and Return (EOF) to return control to LLaMa.\n\n");
|
||||
fprintf(stderr, " - [MULTILINE MODE] Press Ctrl+Z and Return (EOF) to return control to LLaMa.\n\n");
|
||||
#else
|
||||
fprintf(stderr, " - Press Ctrl+D (EOF) to return control to LLaMa.\n\n");
|
||||
fprintf(stderr, " - [MULTILINE MODE] Press Ctrl+D (EOF) to return control to LLaMa.\n\n");
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
|
@ -320,7 +327,7 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// replace end of text token with newline token when in interactive mode
|
||||
if (id == llama_token_eos() && params.interactive && params.input_prefix.empty()) {
|
||||
if (id == llama_token_eos() && params.interactive && instruct_mode) {
|
||||
id = llama_token_newline.front();
|
||||
if (params.antiprompt.size() != 0) {
|
||||
// tokenize and inject first reverse prompt
|
||||
|
@ -377,8 +384,10 @@ int main(int argc, char ** argv) {
|
|||
antiprompt.is_stop_prompt = false;
|
||||
// Check if each of the reverse prompts appears at the end of the output.
|
||||
for (std::string & prompt : params.antiprompt) {
|
||||
if (params.rm_trailing_space_workaround) {
|
||||
antiprompt.trailing_space = prompt.back() == ' ';
|
||||
antiprompt.len = prompt.length() - (antiprompt.trailing_space ? 1 : 0);
|
||||
}
|
||||
if (last_output.find(prompt.c_str(), last_output.length() - antiprompt.len, antiprompt.len) != std::string::npos) {
|
||||
is_interacting = true;
|
||||
antiprompt.any = true;
|
||||
|
@ -389,8 +398,10 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
if (!antiprompt.any) {
|
||||
for (std::string & prompt : params.stopprompt) {
|
||||
if (params.rm_trailing_space_workaround) {
|
||||
antiprompt.trailing_space = prompt.back() == ' ';
|
||||
antiprompt.len = prompt.length() - (antiprompt.trailing_space ? 1 : 0);
|
||||
}
|
||||
if (last_output.find(prompt.c_str(), last_output.length() - antiprompt.len, antiprompt.len) != std::string::npos) {
|
||||
is_interacting = true;
|
||||
antiprompt.any = true;
|
||||
|
@ -406,15 +417,19 @@ int main(int argc, char ** argv) {
|
|||
if (n_past > 0 && is_interacting)
|
||||
{
|
||||
std::string buffer;
|
||||
if (!params.clean_interface && !params.input_prefix.empty() && !antiprompt.any) {
|
||||
if (!params.clean_interface && !params.instruct_prefix.empty() && !antiprompt.any) {
|
||||
// avoid printing again user's new line (TODO: try to revert enter press and print newline)
|
||||
int i = params.input_prefix.front() == '\n' ? 1 : 0;
|
||||
int i = params.instruct_prefix.front() == '\n' ? 1 : 0;
|
||||
for (; i < inp_pfx.size(); i++) {
|
||||
printf("%s", llama_token_to_str(ctx, inp_pfx[i]));
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
if (antiprompt.any && antiprompt.trailing_space) {
|
||||
if (params.rm_trailing_space_workaround) {
|
||||
// add only if not stopprompt (as stopprompt could be used to pause
|
||||
// assistant and then continue without input - adding back trailing
|
||||
// space may mess it up.)
|
||||
if (!antiprompt.is_stop_prompt && antiprompt.any && antiprompt.trailing_space) {
|
||||
// add back removed trailing space to buffer(workaround)
|
||||
buffer += ' ';
|
||||
if (!params.clean_interface) {
|
||||
|
@ -422,6 +437,7 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
// potentially set color to indicate we are taking user input
|
||||
set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
||||
|
@ -435,6 +451,11 @@ int main(int argc, char ** argv) {
|
|||
printf("\n> ");
|
||||
}
|
||||
|
||||
if (!params.input_prefix.empty()) {
|
||||
buffer += params.input_prefix;
|
||||
printf("%s", buffer.c_str());
|
||||
}
|
||||
|
||||
if (!get_input_text(buffer, !params.multiline_mode)) {
|
||||
// input stream is bad
|
||||
return 1;
|
||||
|
@ -446,13 +467,16 @@ int main(int argc, char ** argv) {
|
|||
// done taking input, reset color
|
||||
set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
|
||||
|
||||
if (!params.clean_interface && !params.input_suffix.empty() && !antiprompt.is_stop_prompt) {
|
||||
if (!params.clean_interface && !params.instruct_suffix.empty() && !antiprompt.is_stop_prompt) {
|
||||
// avoid printing again user's new line (TODO: try to revert enter press and print newline)
|
||||
int i = params.input_suffix.front() == '\n' ? 1 : 0;
|
||||
int i = params.instruct_suffix.front() == '\n' ? 1 : 0;
|
||||
for (; i < inp_sfx.size(); i++) {
|
||||
printf("%s", llama_token_to_str(ctx, inp_sfx[i]));
|
||||
}
|
||||
// we won't add back removed trailing space here (workaround)
|
||||
// if (remove trailing space workaround) {
|
||||
// We won't add back removed trailing space here, because assistant continues here,
|
||||
// and it may mess up it's output (remove trailing space workaround).
|
||||
// }
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
|
@ -461,7 +485,7 @@ int main(int argc, char ** argv) {
|
|||
if (buffer.length() > 1) {
|
||||
|
||||
// insert input prefix
|
||||
if (!params.input_prefix.empty() && !antiprompt.any) {
|
||||
if (!params.instruct_prefix.empty() && !antiprompt.any) {
|
||||
n_consumed = embd_inp.size();
|
||||
embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
|
||||
}
|
||||
|
@ -470,7 +494,7 @@ int main(int argc, char ** argv) {
|
|||
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
||||
|
||||
// insert response suffix
|
||||
if (!params.input_suffix.empty() && !antiprompt.is_stop_prompt) {
|
||||
if (!params.instruct_suffix.empty() && !antiprompt.is_stop_prompt) {
|
||||
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
|
||||
}
|
||||
|
||||
|
@ -487,7 +511,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// end of text token
|
||||
if (!embd.empty() && embd.back() == llama_token_eos()) {
|
||||
if (params.interactive && !params.input_prefix.empty()) {
|
||||
if (instruct_mode) {
|
||||
is_interacting = true;
|
||||
} else {
|
||||
fprintf(stderr, " [end of text]\n");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue