diff --git a/main.cpp b/main.cpp index 8e95c23d5..59a839550 100644 --- a/main.cpp +++ b/main.cpp @@ -1055,7 +1055,7 @@ int main(int argc, char ** argv) { } // end of text token - if (embd.back() == 2) { + if (embd.size() && embd.back() == 2) { if (params.interactive) { is_interacting = true; } else { @@ -1063,6 +1063,12 @@ int main(int argc, char ** argv) { break; } } + + // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. + if (params.interactive && remaining_tokens <= 0) { + remaining_tokens = params.n_predict; + is_interacting = true; + } } #if defined (_WIN32)