From c62a39d91eb6af72536300a304cd99cdcc75b7b6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 25 Sep 2024 20:36:38 +0300 Subject: [PATCH] embedding : parse special tokens --- examples/embedding/embedding.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 18d651260..36e4f2e4d 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -135,7 +135,7 @@ int main(int argc, char ** argv) { // tokenize the prompts and trim std::vector> inputs; for (const auto & prompt : prompts) { - auto inp = ::llama_tokenize(ctx, prompt, true, false); + auto inp = ::llama_tokenize(ctx, prompt, true, true); if (inp.size() > n_batch) { LOG_ERR("%s: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n", __func__, (long long int) inp.size(), (long long int) n_batch);