llama : support NUL bytes in tokens

2024-08-11 21:00:03 -04:00 · 2024-08-11 21:00:03 -04:00 · faaac59d16
commit faaac59d16
parent 4134999e01
7 changed files with 28 additions and 18 deletions
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@ -561,7 +561,7 @@ static void load_vocab(const char * filename, const Config * config, struct llam
        vocab->id_to_token.resize(n_vocab);

        for (uint32_t i = 0; i < n_vocab; i++) {
-            std::string word = gguf_get_arr_str(ctx, token_idx, i);
+            std::string word(gguf_get_arr_str(ctx, token_idx, i), gguf_get_arr_str_n(ctx, token_idx, i));

            vocab->token_to_id[word] = i;