Reset token budget after every user intervention.

In interactive mode, every time the model has to respond to user input
it has an increasingly reduced token budget, eventually generating only
a few words before stopping. The token budget in interactive should
apply to every batch of tokens after user intervention, not globally
This commit is contained in:
Johnman 2023-03-19 22:59:16 +01:00
parent da5303c1ea
commit dd459b604f

View file

@ -1054,12 +1054,12 @@ int main(int argc, char ** argv) {
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
}
remaining_tokens -= line_inp.size();
remaining_tokens = params.n_predict - line_inp.size();
input_noecho = true; // do not echo this again
}
is_interacting = false;
}
}
// end of text token
if (embd.back() == EOS_TOKEN_ID) {