diff --git a/examples/main/main.cpp b/examples/main/main.cpp index c6dfd5fbb..cb8747c2b 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -798,6 +798,7 @@ int main(int argc, char ** argv) { } // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. + // We skip this logic when n_predict == -1 (infinite) or -2 (stop at context size). if (params.interactive && n_remain <= 0 && params.n_predict >= 0) { n_remain = params.n_predict; is_interacting = true;