Reset token budget after every user intervention.

In interactive mode, every time the model has to respond to user input it has an increasingly reduced token budget, eventually generating only a few words before stopping. The token budget in interactive should apply to every batch of tokens after user intervention, not globally
2023-03-19 22:59:16 +01:00 · 2023-03-19 22:59:16 +01:00 · dd459b604f
commit dd459b604f
parent da5303c1ea
1 changed files with 2 additions and 2 deletions
--- a/main.cpp
+++ b/main.cpp
@ -1054,11 +1054,11 @@ int main(int argc, char ** argv) {
                    embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
                }

-                remaining_tokens -= line_inp.size();
+                remaining_tokens = params.n_predict - line_inp.size();

                input_noecho = true; // do not echo this again
+                is_interacting = false;
            }
-            is_interacting = false;
        }

        // end of text token