fix perplexity after c-api refactor (#390)

* preallocate a buffer of fitting size for tokenization (utils.cpp) * don't create a new std::string (especially here, where it's usually large)
2023-03-22 17:09:38 +01:00 · 2023-03-22 17:09:38 +01:00 · 56e659a0b2
commit 56e659a0b2
parent 40ea807a97
2 changed files with 4 additions and 2 deletions
--- a/utils.cpp
+++ b/utils.cpp
@ -146,8 +146,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) {

 // TODO: not great allocating this every time
 std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
-    std::vector<llama_token> res(8096);
+    // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
+    std::vector<llama_token> res(text.size() + (int)add_bos);
    int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
+    assert(n >= 0);
    res.resize(n);

    return res;