Fix color codes emitting mid-UTF8 code.

This commit is contained in:
Matvey Soloviev 2023-03-20 02:51:11 +01:00
parent da5303c1ea
commit 5880a9d788

View file

@ -28,6 +28,36 @@
#define ANSI_COLOR_RESET "\x1b[0m" #define ANSI_COLOR_RESET "\x1b[0m"
#define ANSI_BOLD "\x1b[1m" #define ANSI_BOLD "\x1b[1m"
/* Keep track of current color of output, and emit ANSI code if it changes. */
enum console_state {
CONSOLE_STATE_DEFAULT=0,
CONSOLE_STATE_PROMPT,
CONSOLE_STATE_USER_INPUT
};
static console_state con_st = CONSOLE_STATE_DEFAULT;
static bool con_use_color = false;
void set_console_state(console_state new_st)
{
if (!con_use_color) return;
// only emit color code if state changed
if (new_st != con_st) {
con_st = new_st;
switch(con_st) {
case CONSOLE_STATE_DEFAULT:
printf(ANSI_COLOR_RESET);
return;
case CONSOLE_STATE_PROMPT:
printf(ANSI_COLOR_YELLOW);
return;
case CONSOLE_STATE_USER_INPUT:
printf(ANSI_BOLD ANSI_COLOR_GREEN);
return;
}
}
}
static const int EOS_TOKEN_ID = 2; static const int EOS_TOKEN_ID = 2;
// determine number of model parts based on the dimension // determine number of model parts based on the dimension
@ -749,7 +779,7 @@ static bool is_interacting = false;
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
void sigint_handler(int signo) { void sigint_handler(int signo) {
printf(ANSI_COLOR_RESET); set_console_state(CONSOLE_STATE_DEFAULT);
printf("\n"); // this also force flush stdout. printf("\n"); // this also force flush stdout.
if (signo == SIGINT) { if (signo == SIGINT) {
if (!is_interacting) { if (!is_interacting) {
@ -808,6 +838,10 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng); params.prompt = gpt_random_prompt(rng);
} }
// save choice to use color for later
// (note for later: this is a slightly awkward choice)
con_use_color = params.use_color;
// params.prompt = R"(// this function checks if the number n is prime // params.prompt = R"(// this function checks if the number n is prime
//bool is_prime(int n) {)"; //bool is_prime(int n) {)";
@ -931,10 +965,8 @@ int main(int argc, char ** argv) {
int remaining_tokens = params.n_predict; int remaining_tokens = params.n_predict;
// set the color for the prompt which will be output initially // the first thing we will do is to output the prompt, so set color accordingly
if (params.use_color) { set_console_state(CONSOLE_STATE_PROMPT);
printf(ANSI_COLOR_YELLOW);
}
while (remaining_tokens > 0 || params.interactive) { while (remaining_tokens > 0 || params.interactive) {
// predict // predict
@ -1008,8 +1040,8 @@ int main(int argc, char ** argv) {
fflush(stdout); fflush(stdout);
} }
// reset color to default if we there is no pending user input // reset color to default if we there is no pending user input
if (!input_noecho && params.use_color && (int)embd_inp.size() == input_consumed) { if (!input_noecho && (int)embd_inp.size() == input_consumed) {
printf(ANSI_COLOR_RESET); set_console_state(CONSOLE_STATE_DEFAULT);
} }
// in interactive mode, and not currently processing queued inputs; // in interactive mode, and not currently processing queued inputs;
@ -1024,6 +1056,9 @@ int main(int argc, char ** argv) {
} }
} }
if (is_interacting) { if (is_interacting) {
// potentially set color to indicate we are taking user input
set_console_state(CONSOLE_STATE_USER_INPUT);
if (params.instruct) { if (params.instruct) {
input_consumed = embd_inp.size(); input_consumed = embd_inp.size();
embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end()); embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
@ -1031,8 +1066,6 @@ int main(int argc, char ** argv) {
printf("\n> "); printf("\n> ");
} }
// currently being interactive
if (params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN);
std::string buffer; std::string buffer;
std::string line; std::string line;
bool another_line = true; bool another_line = true;
@ -1045,7 +1078,9 @@ int main(int argc, char ** argv) {
} }
buffer += line + '\n'; // Append the line to the result buffer += line + '\n'; // Append the line to the result
} while (another_line); } while (another_line);
if (params.use_color) printf(ANSI_COLOR_RESET);
// done taking input, reset color
set_console_state(CONSOLE_STATE_DEFAULT);
std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(vocab, buffer, false); std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(vocab, buffer, false);
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
@ -1096,9 +1131,7 @@ int main(int argc, char ** argv) {
ggml_free(model.ctx); ggml_free(model.ctx);
if (params.use_color) { set_console_state(CONSOLE_STATE_DEFAULT);
printf(ANSI_COLOR_RESET);
}
return 0; return 0;
} }