pass correct max number of tokens to llama_tokenize

2023-09-14 03:04:04 +02:00 · 2023-09-14 03:04:04 +02:00 · f627e2fe9c
commit f627e2fe9c
parent 7f378a7561
1 changed files with 4 additions and 4 deletions
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@ -1266,10 +1266,10 @@ size_t tokenize_file(
        // tokenize all data at once
        out_tokens.resize(buf.size());

-        int n_tokens = llama_tokenize(lctx, buf.data(), out_tokens.data(), buf.size(), false);
+        int n_tokens = llama_tokenize(lctx, buf.data(), out_tokens.data(), out_tokens.size(), false);
        if (n_tokens < 0) {
            out_tokens.resize(-n_tokens);
-            n_tokens = llama_tokenize(lctx, buf.data(), out_tokens.data(), buf.size(), false);
+            n_tokens = llama_tokenize(lctx, buf.data(), out_tokens.data(), out_tokens.size(), false);
        }
        if (n_tokens >= 0) {
            out_tokens.resize(n_tokens);
@ -1362,13 +1362,13 @@ size_t tokenize_file(
                int n_tokens = llama_tokenize(lctx,
                    buf_sample.data(),
                    tok_sample.data(),
-                    sample_size, false);
+                    tok_sample.size(), false);
                if (n_tokens < 0) {
                    tok_sample.resize(-n_tokens);
                    n_tokens = llama_tokenize(lctx,
                        buf_sample.data(),
                        tok_sample.data(),
-                        sample_size, false);
+                        tok_sample.size(), false);
                    GGML_ASSERT(n_tokens >= 0);
                }
                GGML_ASSERT(n_tokens <= tok_sample.size());