From 99eafe908fe1ceed856d9718ef6327cd9f5a0266 Mon Sep 17 00:00:00 2001
From: Jeffersoncgo <jeffersoncandeiasgomes@gmail.com.com>
Date: Wed, 19 Apr 2023 08:01:35 -0400
Subject: [PATCH] more_responsive

---
 examples/common.cpp    |  8 ++++++++
 examples/common.h      |  2 ++
 examples/main/main.cpp | 13 +++++++++++--
 llama.cpp              |  2 +-
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/examples/common.cpp b/examples/common.cpp
index a0b6f10ad..f87c18b76 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -199,6 +199,14 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
                 break;
             }
             params.input_prefix = argv[i];
+        } else if (arg == "--forceendtoken") {
+            params.forceendtoken = true;
+        } else if (arg == "--eot_token") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.eot_token = argv[i];
         } else {
             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
             gpt_print_usage(argc, argv, default_params);
diff --git a/examples/common.h b/examples/common.h
index cbbc2dfab..c21369224 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -36,6 +36,7 @@ struct gpt_params {
 
     std::string lora_adapter = "";  // lora adapter path
     std::string lora_base = "";     // base model path for the lora adapter
+    std::string eot_token = "[end of text]";
 
     bool memory_f16        = true;  // use f16 instead of f32 for memory kv
     bool random_prompt     = false; // do not randomize prompt if none provided
@@ -52,6 +53,7 @@ struct gpt_params {
     bool use_mlock         = false; // use mlock to keep model in memory
     bool mem_test          = false; // compute maximum memory usage
     bool verbose_prompt    = false; // print prompt tokens before generation
+    bool forceendtoken = true; // Force show the "[end of text]" token after the generation
 };
 
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index b7b3c4196..4d9f62106 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -377,6 +377,9 @@ int main(int argc, char ** argv) {
                         is_antiprompt = true;
                         set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
                         fflush(stdout);
+                        if (params.forceendtoken) {
+                            fprintf(stderr, (params.eot_token + "\n").c_str());
+                        }
                         break;
                     }
                 }
@@ -459,10 +462,13 @@ int main(int argc, char ** argv) {
 
         // end of text token
         if (!embd.empty() && embd.back() == llama_token_eos()) {
+            if (params.forceendtoken || !params.instruct) {
+                fprintf(stderr, (params.eot_token + "\n").c_str());
+            }
             if (params.instruct) {
                 is_interacting = true;
-            } else {
-                fprintf(stderr, " [end of text]\n");
+            }
+            else {
                 break;
             }
         }
@@ -470,6 +476,9 @@ int main(int argc, char ** argv) {
         // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
         if (params.interactive && n_remain <= 0 && params.n_predict != -1) {
             n_remain = params.n_predict;
+            if (params.forceendtoken) {
+                fprintf(stderr, (params.eot_token + "\n").c_str());
+            }
             is_interacting = true;
         }
     }
diff --git a/llama.cpp b/llama.cpp
index 3ff5dc1e1..dfec05910 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1711,7 +1711,7 @@ struct llama_context * llama_init_from_file(
             unsigned percentage = (unsigned) (100 * progress);
             while (percentage > *cur_percentage_p) {
                 ++*cur_percentage_p;
-                fprintf(stderr, ".");
+                fprintf(stderr, "[porcentage] %u%%\n", *cur_percentage_p);
                 fflush(stderr);
                 if (percentage >= 100) {
                     fprintf(stderr, "\n");