From 7b1b575fe8de8ece4122f7bd7a8242e63841ec70 Mon Sep 17 00:00:00 2001
From: Green Sky <green@g-s.xyz>
Date: Wed, 22 Mar 2023 12:56:42 +0100
Subject: [PATCH] preallocate a buffer of fitting size for tokenization
 (utils.cpp)

---
 utils.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/utils.cpp b/utils.cpp
index 1679ae10a..3909c974f 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -146,8 +146,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
 
 // TODO: not great allocating this every time
 std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
-    std::vector<llama_token> res(8096);
+    // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
+    std::vector<llama_token> res(text.size() + (int)add_bos);
     int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
+    assert(n >= 0);
     res.resize(n);
 
     return res;