From dd459b604ffb4c829b4f51a34988ac70f8e4209f Mon Sep 17 00:00:00 2001
From: Johnman <tjohnman@github>
Date: Sun, 19 Mar 2023 22:59:16 +0100
Subject: [PATCH] Reset token budget after every user intervention.

In interactive mode, every time the model has to respond to user input
it has an increasingly reduced token budget, eventually generating only
a few words before stopping. The token budget in interactive should
apply to every batch of tokens after user intervention, not globally
---
 main.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main.cpp b/main.cpp
index c005d17cc..0a2488711 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1054,11 +1054,11 @@ int main(int argc, char ** argv) {
                     embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
                 }
 
-                remaining_tokens -= line_inp.size();
+                remaining_tokens = params.n_predict - line_inp.size();
 
                 input_noecho = true; // do not echo this again
+                is_interacting = false;
             }
-            is_interacting = false;
         }
 
         // end of text token