Fix color codes emitting mid-UTF8 code.

This commit is contained in:
Matvey Soloviev 2023-03-20 02:51:11 +01:00
parent da5303c1ea
commit 5880a9d788

View file

@ -28,6 +28,36 @@
#define ANSI_COLOR_RESET "\x1b[0m"
#define ANSI_BOLD "\x1b[1m"
/* Keep track of current color of output, and emit ANSI code if it changes. */
enum console_state {
CONSOLE_STATE_DEFAULT=0,
CONSOLE_STATE_PROMPT,
CONSOLE_STATE_USER_INPUT
};
static console_state con_st = CONSOLE_STATE_DEFAULT;
static bool con_use_color = false;
void set_console_state(console_state new_st)
{
if (!con_use_color) return;
// only emit color code if state changed
if (new_st != con_st) {
con_st = new_st;
switch(con_st) {
case CONSOLE_STATE_DEFAULT:
printf(ANSI_COLOR_RESET);
return;
case CONSOLE_STATE_PROMPT:
printf(ANSI_COLOR_YELLOW);
return;
case CONSOLE_STATE_USER_INPUT:
printf(ANSI_BOLD ANSI_COLOR_GREEN);
return;
}
}
}
static const int EOS_TOKEN_ID = 2;
// determine number of model parts based on the dimension
@ -749,7 +779,7 @@ static bool is_interacting = false;
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
void sigint_handler(int signo) {
printf(ANSI_COLOR_RESET);
set_console_state(CONSOLE_STATE_DEFAULT);
printf("\n"); // this also force flush stdout.
if (signo == SIGINT) {
if (!is_interacting) {
@ -808,6 +838,10 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}
// save choice to use color for later
// (note for later: this is a slightly awkward choice)
con_use_color = params.use_color;
// params.prompt = R"(// this function checks if the number n is prime
//bool is_prime(int n) {)";
@ -931,10 +965,8 @@ int main(int argc, char ** argv) {
int remaining_tokens = params.n_predict;
// set the color for the prompt which will be output initially
if (params.use_color) {
printf(ANSI_COLOR_YELLOW);
}
// the first thing we will do is to output the prompt, so set color accordingly
set_console_state(CONSOLE_STATE_PROMPT);
while (remaining_tokens > 0 || params.interactive) {
// predict
@ -1008,8 +1040,8 @@ int main(int argc, char ** argv) {
fflush(stdout);
}
// reset color to default if we there is no pending user input
if (!input_noecho && params.use_color && (int)embd_inp.size() == input_consumed) {
printf(ANSI_COLOR_RESET);
if (!input_noecho && (int)embd_inp.size() == input_consumed) {
set_console_state(CONSOLE_STATE_DEFAULT);
}
// in interactive mode, and not currently processing queued inputs;
@ -1024,6 +1056,9 @@ int main(int argc, char ** argv) {
}
}
if (is_interacting) {
// potentially set color to indicate we are taking user input
set_console_state(CONSOLE_STATE_USER_INPUT);
if (params.instruct) {
input_consumed = embd_inp.size();
embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
@ -1031,8 +1066,6 @@ int main(int argc, char ** argv) {
printf("\n> ");
}
// currently being interactive
if (params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN);
std::string buffer;
std::string line;
bool another_line = true;
@ -1045,7 +1078,9 @@ int main(int argc, char ** argv) {
}
buffer += line + '\n'; // Append the line to the result
} while (another_line);
if (params.use_color) printf(ANSI_COLOR_RESET);
// done taking input, reset color
set_console_state(CONSOLE_STATE_DEFAULT);
std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(vocab, buffer, false);
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
@ -1096,9 +1131,7 @@ int main(int argc, char ** argv) {
ggml_free(model.ctx);
if (params.use_color) {
printf(ANSI_COLOR_RESET);
}
set_console_state(CONSOLE_STATE_DEFAULT);
return 0;
}