llama : support NUL bytes in tokens

2024-08-11 21:00:03 -04:00 · 2024-08-11 21:00:03 -04:00 · faaac59d16
commit faaac59d16
parent 4134999e01
7 changed files with 28 additions and 18 deletions
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@ -561,7 +561,7 @@ static void load_vocab(const char * filename, const Config * config, struct llam
        vocab->id_to_token.resize(n_vocab);

        for (uint32_t i = 0; i < n_vocab; i++) {
-            std::string word = gguf_get_arr_str(ctx, token_idx, i);
+            std::string word(gguf_get_arr_str(ctx, token_idx, i), gguf_get_arr_str_n(ctx, token_idx, i));

            vocab->token_to_id[word] = i;

--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@ -12,7 +12,7 @@ static bool g_verbose = false;

 static std::string get_kv_str(struct gguf_context * ctx_gguf, const std::string & key){
    int id = gguf_find_key(ctx_gguf, key.c_str());
-    return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id));
+    return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id), gguf_get_val_str_n(ctx_gguf, id));
 }

 static float get_kv_f32(struct gguf_context * ctx_gguf, const std::string & key) {
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@ -225,7 +225,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {

    switch (type) {
        case GGUF_TYPE_STRING:
-            return gguf_get_val_str(ctx_gguf, i);
+            return std::string(gguf_get_val_str(ctx_gguf, i), gguf_get_val_str_n(ctx_gguf, i));
        case GGUF_TYPE_ARRAY:
            {
                const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
@ -235,7 +235,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
                ss << "[";
                for (int j = 0; j < arr_n; j++) {
                    if (arr_type == GGUF_TYPE_STRING) {
-                        std::string val = gguf_get_arr_str(ctx_gguf, i, j);
+                        std::string val(gguf_get_arr_str(ctx_gguf, i, j), gguf_get_arr_str_n(ctx_gguf, i, j));
                        // escape quotes
                        replace_all(val, "\\", "\\\\");
                        replace_all(val, "\"", "\\\"");