minor : comments + rename

ggml-ci
2023-10-16 18:17:31 +03:00 · 2023-10-16 18:17:31 +03:00 · 373d782d42
commit 373d782d42
parent 1c626e2fe1
4 changed files with 13 additions and 7 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -612,8 +612,14 @@ int main(int argc, char ** argv) {
            LOG("embd_inp.size(): %d, n_consumed: %d\n", (int) embd_inp.size(), n_consumed);
            while ((int) embd_inp.size() > n_consumed) {
                embd.push_back(embd_inp[n_consumed]);
+
+                // GG: I'm not sure it's a good idea to push the prompt tokens into the sampling context
+                //     Most likely will remove this in the future to avoid exposing "prev"
+                //     Same thing is done in "server". If we stop pushing the prompt tokens, then the repetition
+                //     penalty will be applied only based on the tokens generated by the model.
                ctx_sampling->prev.erase(ctx_sampling->prev.begin());
                ctx_sampling->prev.push_back(embd_inp[n_consumed]);
+
                ++n_consumed;
                if ((int) embd.size() >= params.n_batch) {
                    break;