grammars: fix resampling logic regression (#7424)

2024-05-21 20:40:00 +01:00 · 2024-05-21 20:40:00 +01:00 · e402de364b
commit e402de364b
parent fcf6538ba6
2 changed files with 9 additions and 8 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -707,7 +707,7 @@ int main(int argc, char ** argv) {

            const llama_token id = llama_sampling_sample(ctx_sampling, ctx, ctx_guidance);

-            llama_sampling_accept(ctx_sampling, ctx, id, true);
+            llama_sampling_accept(ctx_sampling, ctx, id, /* apply_grammar= */ true);

            LOG("last: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, ctx_sampling->prev).c_str());

@ -728,7 +728,7 @@ int main(int argc, char ** argv) {

                // push the prompt in the sampling context in order to apply repetition penalties later
                // for the prompt, we don't apply grammar rules
-                llama_sampling_accept(ctx_sampling, ctx, embd_inp[n_consumed], false);
+                llama_sampling_accept(ctx_sampling, ctx, embd_inp[n_consumed], /* apply_grammar= */ false);

                ++n_consumed;
                if ((int) embd.size() >= params.n_batch) {