From dd459b604ffb4c829b4f51a34988ac70f8e4209f Mon Sep 17 00:00:00 2001 From: Johnman Date: Sun, 19 Mar 2023 22:59:16 +0100 Subject: [PATCH] Reset token budget after every user intervention. In interactive mode, every time the model has to respond to user input it has an increasingly reduced token budget, eventually generating only a few words before stopping. The token budget in interactive should apply to every batch of tokens after user intervention, not globally --- main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index c005d17cc..0a2488711 100644 --- a/main.cpp +++ b/main.cpp @@ -1054,11 +1054,11 @@ int main(int argc, char ** argv) { embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); } - remaining_tokens -= line_inp.size(); + remaining_tokens = params.n_predict - line_inp.size(); input_noecho = true; // do not echo this again + is_interacting = false; } - is_interacting = false; } // end of text token