llama_tokenize should accept strings containing NUL now

This commit is contained in:
goerch 2023-09-14 21:26:08 +02:00
parent a90bf494c9
commit e41209a95f

View file

@ -804,10 +804,10 @@ std::vector<llama_token> llama_tokenize(
// upper limit for the number of tokens
int n_tokens = text.length() + add_bos;
std::vector<llama_token> result(n_tokens);
n_tokens = llama_tokenize(ctx, text.c_str(), text.length(), result.data(), result.size(), add_bos);
n_tokens = llama_tokenize(ctx, text.data(), text.length(), result.data(), result.size(), add_bos);
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_tokenize(ctx, text.c_str(), text.length(), result.data(), result.size(), add_bos);
int check = llama_tokenize(ctx, text.data(), text.length(), result.data(), result.size(), add_bos);
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);