llama: fixed n_vocab for no_vocab models

2024-09-16 18:30:28 +02:00 · 2024-09-16 18:30:28 +02:00 · a5e87bf438
commit a5e87bf438
parent 23e0d70bac
1 changed files with 6 additions and 1 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -6061,8 +6061,13 @@ static void llm_load_vocab(
            vocab.special_mask_id = -1;
            vocab.linefeed_id     = -1;

+            // read vocab size from metadata
+            ml.get_key(LLM_KV_VOCAB_SIZE, vocab.n_vocab);
+
            return;
-        } else if (tokenizer_model == "llama") {
+        }
+
+        if (tokenizer_model == "llama") {
            vocab.type = LLAMA_VOCAB_TYPE_SPM;

            // default special tokens