From 5880a9d7888e71a7003ecf839c528c2695e39003 Mon Sep 17 00:00:00 2001 From: Matvey Soloviev Date: Mon, 20 Mar 2023 02:51:11 +0100 Subject: [PATCH] Fix color codes emitting mid-UTF8 code. --- main.cpp | 59 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/main.cpp b/main.cpp index c005d17cc..32055f742 100644 --- a/main.cpp +++ b/main.cpp @@ -28,6 +28,36 @@ #define ANSI_COLOR_RESET "\x1b[0m" #define ANSI_BOLD "\x1b[1m" +/* Keep track of current color of output, and emit ANSI code if it changes. */ +enum console_state { + CONSOLE_STATE_DEFAULT=0, + CONSOLE_STATE_PROMPT, + CONSOLE_STATE_USER_INPUT +}; + +static console_state con_st = CONSOLE_STATE_DEFAULT; +static bool con_use_color = false; + +void set_console_state(console_state new_st) +{ + if (!con_use_color) return; + // only emit color code if state changed + if (new_st != con_st) { + con_st = new_st; + switch(con_st) { + case CONSOLE_STATE_DEFAULT: + printf(ANSI_COLOR_RESET); + return; + case CONSOLE_STATE_PROMPT: + printf(ANSI_COLOR_YELLOW); + return; + case CONSOLE_STATE_USER_INPUT: + printf(ANSI_BOLD ANSI_COLOR_GREEN); + return; + } + } +} + static const int EOS_TOKEN_ID = 2; // determine number of model parts based on the dimension @@ -749,7 +779,7 @@ static bool is_interacting = false; #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) void sigint_handler(int signo) { - printf(ANSI_COLOR_RESET); + set_console_state(CONSOLE_STATE_DEFAULT); printf("\n"); // this also force flush stdout. if (signo == SIGINT) { if (!is_interacting) { @@ -808,6 +838,10 @@ int main(int argc, char ** argv) { params.prompt = gpt_random_prompt(rng); } + // save choice to use color for later + // (note for later: this is a slightly awkward choice) + con_use_color = params.use_color; + // params.prompt = R"(// this function checks if the number n is prime //bool is_prime(int n) {)"; @@ -931,10 +965,8 @@ int main(int argc, char ** argv) { int remaining_tokens = params.n_predict; - // set the color for the prompt which will be output initially - if (params.use_color) { - printf(ANSI_COLOR_YELLOW); - } + // the first thing we will do is to output the prompt, so set color accordingly + set_console_state(CONSOLE_STATE_PROMPT); while (remaining_tokens > 0 || params.interactive) { // predict @@ -1008,8 +1040,8 @@ int main(int argc, char ** argv) { fflush(stdout); } // reset color to default if we there is no pending user input - if (!input_noecho && params.use_color && (int)embd_inp.size() == input_consumed) { - printf(ANSI_COLOR_RESET); + if (!input_noecho && (int)embd_inp.size() == input_consumed) { + set_console_state(CONSOLE_STATE_DEFAULT); } // in interactive mode, and not currently processing queued inputs; @@ -1024,6 +1056,9 @@ int main(int argc, char ** argv) { } } if (is_interacting) { + // potentially set color to indicate we are taking user input + set_console_state(CONSOLE_STATE_USER_INPUT); + if (params.instruct) { input_consumed = embd_inp.size(); embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end()); @@ -1031,8 +1066,6 @@ int main(int argc, char ** argv) { printf("\n> "); } - // currently being interactive - if (params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN); std::string buffer; std::string line; bool another_line = true; @@ -1045,7 +1078,9 @@ int main(int argc, char ** argv) { } buffer += line + '\n'; // Append the line to the result } while (another_line); - if (params.use_color) printf(ANSI_COLOR_RESET); + + // done taking input, reset color + set_console_state(CONSOLE_STATE_DEFAULT); std::vector line_inp = ::llama_tokenize(vocab, buffer, false); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); @@ -1096,9 +1131,7 @@ int main(int argc, char ** argv) { ggml_free(model.ctx); - if (params.use_color) { - printf(ANSI_COLOR_RESET); - } + set_console_state(CONSOLE_STATE_DEFAULT); return 0; }