From 4ab3f47cbca37c1b29d21b57a75f39a2c5af5e3b Mon Sep 17 00:00:00 2001 From: Yann Follet Date: Wed, 20 Dec 2023 03:24:26 +0000 Subject: [PATCH] add the parameter : --no-display-prompt , combine with --log-disable it will display only the generated tokens --- common/common.cpp | 6 +++++- common/common.h | 1 + examples/main/main.cpp | 8 +++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 93d5483e4..669920c84 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -592,6 +592,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { params.numa = true; } else if (arg == "--verbose-prompt") { params.verbose_prompt = true; + } else if (arg == "--no-display-prompt") { + params.display_prompt = false; } else if (arg == "-r" || arg == "--reverse-prompt") { if (++i >= argc) { invalid_param = true; @@ -904,7 +906,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" Not recommended since this is both slower and uses more VRAM.\n"); #endif // GGML_USE_CUBLAS #endif - printf(" --verbose-prompt print prompt before generation\n"); + printf(" --verbose-prompt print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false"); + printf(" --no-display-prompt don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false"); printf(" -dkvc, --dump-kv-cache\n"); printf(" verbose print of the KV cache\n"); printf(" -nkvo, --no-kv-offload\n"); @@ -1539,6 +1542,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p); fprintf(stream, "typical_p: %f # default: 1.0\n", sparams.typical_p); fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false"); + fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false"); } // diff --git a/common/common.h b/common/common.h index e87ce1133..4a80d5b81 100644 --- a/common/common.h +++ b/common/common.h @@ -122,6 +122,7 @@ struct gpt_params { bool use_mlock = false; // use mlock to keep model in memory bool numa = false; // attempt optimizations that help on some NUMA systems bool verbose_prompt = false; // print prompt tokens before generation + bool display_prompt = true; // print prompt before generation bool infill = false; // use infill mode bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes bool no_kv_offload = false; // disable KV offloading diff --git a/examples/main/main.cpp b/examples/main/main.cpp index c096f110b..65bf94e00 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -462,6 +462,7 @@ int main(int argc, char ** argv) { bool is_antiprompt = false; bool input_echo = true; + bool display = true; bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < embd_inp.size(); int n_past = 0; @@ -476,6 +477,7 @@ int main(int argc, char ** argv) { // the first thing we will do is to output the prompt, so set color accordingly console::set_display(console::prompt); + display = params.display_prompt; std::vector embd; std::vector embd_guidance; @@ -664,7 +666,7 @@ int main(int argc, char ** argv) { } // display text - if (input_echo) { + if (input_echo && display) { for (auto id : embd) { const std::string token_str = llama_token_to_piece(ctx, id); printf("%s", token_str.c_str()); @@ -681,6 +683,8 @@ int main(int argc, char ** argv) { // reset color to default if there is no pending user input if (input_echo && (int) embd_inp.size() == n_consumed) { console::set_display(console::reset); + display = true; + } // if not currently processing queued inputs; @@ -753,6 +757,7 @@ int main(int argc, char ** argv) { // color user input only console::set_display(console::user_input); + display = params.display_prompt; std::string line; bool another_line = true; @@ -763,6 +768,7 @@ int main(int argc, char ** argv) { // done taking input, reset color console::set_display(console::reset); + display = true; // Add tokens to embd only if the input buffer is non-empty // Entering a empty line lets the user pass control back