Merge 35b0bf0585 into c4fe84fb0d

2023-04-24 08:33:12 -05:00 · 2023-04-24 08:33:12 -05:00 · d2c2630307
commit d2c2630307
parent c4fe84fb0d 35b0bf0585
4 changed files with 25 additions and 4 deletions
--- a/examples/common.cpp
+++ b/examples/common.cpp
@ -199,6 +199,14 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
                break;
            }
            params.input_prefix = argv[i];
+        } else if (arg == "--forceendtoken") {
+            params.forceendtoken = true;
+        } else if (arg == "--eot_token") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.eot_token = argv[i];
        } else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            gpt_print_usage(argc, argv, default_params);
--- a/examples/common.h
+++ b/examples/common.h
@ -36,6 +36,7 @@ struct gpt_params {

    std::string lora_adapter = "";  // lora adapter path
    std::string lora_base = "";     // base model path for the lora adapter
+    std::string eot_token = "[end of text]";

    bool memory_f16        = true;  // use f16 instead of f32 for memory kv
    bool random_prompt     = false; // do not randomize prompt if none provided
@ -52,6 +53,7 @@ struct gpt_params {
    bool use_mlock         = false; // use mlock to keep model in memory
    bool mem_test          = false; // compute maximum memory usage
    bool verbose_prompt    = false; // print prompt tokens before generation
+    bool forceendtoken = true; // Force show the "[end of text]" token after the generation
 };

 bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -245,7 +245,8 @@ int main(int argc, char ** argv) {
               " - Press Ctrl+C to interject at any time.\n"
 #endif
               " - Press Return to return control to LLaMa.\n"
-               " - If you want to submit another line, end your input in '\\'.\n\n");
+               " - If you want to submit another line, end your input in '\\'.\n"
+               "[model ready]\n");
        is_interacting = params.interactive_start;
    }

@ -388,6 +389,9 @@ int main(int argc, char ** argv) {
                        is_antiprompt = true;
                        set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
                        fflush(stdout);
+                        if (params.forceendtoken) {
+                            fprintf(stderr, (params.eot_token + "\n").c_str());
+                        }
                        break;
                    }
                }
@ -470,10 +474,13 @@ int main(int argc, char ** argv) {

        // end of text token
        if (!embd.empty() && embd.back() == llama_token_eos()) {
+            if (params.forceendtoken || !params.instruct) {
+                fprintf(stderr, (params.eot_token + "\n").c_str());
+            }
            if (params.instruct) {
                is_interacting = true;
-            } else {
-                fprintf(stderr, " [end of text]\n");
+            }
+            else {
                break;
            }
        }
@ -481,6 +488,9 @@ int main(int argc, char ** argv) {
        // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
        if (params.interactive && n_remain <= 0 && params.n_predict != -1) {
            n_remain = params.n_predict;
+            if (params.forceendtoken) {
+                fprintf(stderr, (params.eot_token + "\n").c_str());
+            }
            is_interacting = true;
        }
    }
--- a/llama.cpp
+++ b/llama.cpp
@ -938,6 +938,7 @@ static void llama_model_load_internal(

        fprintf(stderr, "%s: mem required  = %7.2f MB (+ %7.2f MB per state)\n", __func__,
                mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
+        fprintf(stderr, "[model loading]\n");
    }

    // create the ggml context
@ -1749,7 +1750,7 @@ struct llama_context * llama_init_from_file(
            unsigned percentage = (unsigned) (100 * progress);
            while (percentage > *cur_percentage_p) {
                ++*cur_percentage_p;
-                fprintf(stderr, ".");
+                fprintf(stderr, "[porcentage] %u%%\n", *cur_percentage_p);
                fflush(stderr);
                if (percentage >= 100) {
                    fprintf(stderr, "\n");