grammars: fix resampling logic regression (#7424)

This commit is contained in:
Olivier Chafik 2024-05-21 20:40:00 +01:00 committed by GitHub
parent fcf6538ba6
commit e402de364b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 9 additions and 8 deletions

View file

@ -707,7 +707,7 @@ int main(int argc, char ** argv) {
const llama_token id = llama_sampling_sample(ctx_sampling, ctx, ctx_guidance);
llama_sampling_accept(ctx_sampling, ctx, id, true);
llama_sampling_accept(ctx_sampling, ctx, id, /* apply_grammar= */ true);
LOG("last: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, ctx_sampling->prev).c_str());
@ -728,7 +728,7 @@ int main(int argc, char ** argv) {
// push the prompt in the sampling context in order to apply repetition penalties later
// for the prompt, we don't apply grammar rules
llama_sampling_accept(ctx_sampling, ctx, embd_inp[n_consumed], false);
llama_sampling_accept(ctx_sampling, ctx, embd_inp[n_consumed], /* apply_grammar= */ false);
++n_consumed;
if ((int) embd.size() >= params.n_batch) {