From 69fc940d9a5dfcece5c7bd854b024c5280fda70f Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Tue, 14 Jan 2025 10:26:47 +0200
Subject: [PATCH 1/2] vocab : add dummy tokens for "no_vocab" type

ggml-ci
---
 src/llama-vocab.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 96b74e93a..06ef13fff 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1356,8 +1356,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
             // read vocab size from metadata
             uint32_t n_tokens = 0;
-            if (!ml.get_key(LLM_KV_VOCAB_SIZE, n_tokens, false)) {
-                LLAMA_LOG_WARN("%s: there is no vocab_size in metadata\n", __func__);
+            if (ml.get_key(LLM_KV_VOCAB_SIZE, n_tokens, false)) {
+                LLAMA_LOG_WARN("%s: adding %d dummy tokens\n", __func__, n_tokens);
+                id_to_token.resize(n_tokens);
             }
 
             return;

From 0cf9a067994fbce9ef31827f7a98e66552410f3b Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Tue, 14 Jan 2025 10:36:18 +0200
Subject: [PATCH 2/2] vocab : minor [no ci]

---
 src/llama-vocab.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 06ef13fff..4969d2628 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1357,7 +1357,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
             // read vocab size from metadata
             uint32_t n_tokens = 0;
             if (ml.get_key(LLM_KV_VOCAB_SIZE, n_tokens, false)) {
-                LLAMA_LOG_WARN("%s: adding %d dummy tokens\n", __func__, n_tokens);
+                LLAMA_LOG_WARN("%s: adding %u dummy tokens\n", __func__, n_tokens);
                 id_to_token.resize(n_tokens);
             }