From 910fb8b683b3d12f5f79225e7427b93511c6f324 Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Sat, 10 Jun 2023 07:44:16 -0600 Subject: [PATCH] Fix issue where interactive mode crashes when input exceeds ctx size Closes #1768 --- examples/common.cpp | 3 +++ examples/common.h | 3 ++- examples/main/main.cpp | 11 +++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/examples/common.cpp b/examples/common.cpp index f5d886acf..df69f2736 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -632,6 +632,9 @@ void console_set_color(console_state & con_st, console_color_t color) { case CONSOLE_COLOR_USER_INPUT: fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN); break; + case CONSOLE_COLOR_ERROR: + fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED); + break; } con_st.color = color; fflush(con_st.out); diff --git a/examples/common.h b/examples/common.h index 826e2ae59..6fedb414a 100644 --- a/examples/common.h +++ b/examples/common.h @@ -112,7 +112,8 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params); enum console_color_t { CONSOLE_COLOR_DEFAULT=0, CONSOLE_COLOR_PROMPT, - CONSOLE_COLOR_USER_INPUT + CONSOLE_COLOR_USER_INPUT, + CONSOLE_COLOR_ERROR }; struct console_state { diff --git a/examples/main/main.cpp b/examples/main/main.cpp index de63faa3e..02aa06db2 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -331,6 +331,17 @@ int main(int argc, char ** argv) { while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (embd.size() > 0) { + auto max_embd_size = n_ctx - 2; + // Ensure the input doesn't exceed the context size by truncating embd if necessary. + if ((int)embd.size() > max_embd_size) { + auto skipped_tokens = embd.size() - max_embd_size; + console_set_color(con_st, CONSOLE_COLOR_ERROR); + printf("<>", skipped_tokens, skipped_tokens != 1 ? "s" : ""); + console_set_color(con_st, CONSOLE_COLOR_DEFAULT); + fflush(stdout); + embd.resize(max_embd_size); + } + // infinite text generation via context swapping // if we run out of context: // - take the n_keep first tokens from the original prompt (via n_past)