From cc3fe18b43844d3a4a4663ee7fc89e9ae40a7b3e Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Tue, 27 Feb 2024 14:34:29 +0100 Subject: [PATCH] vocab size as a part of a model metadata --- convert.py | 1 + gguf-py/gguf/constants.py | 2 ++ gguf-py/gguf/gguf_writer.py | 3 +++ llama.cpp | 4 +++- 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/convert.py b/convert.py index c15f8c47e..3dba04395 100755 --- a/convert.py +++ b/convert.py @@ -977,6 +977,7 @@ class OutputFile: name = str(params.path_model.parent).split('/')[-1] self.gguf.add_name (name) + self.gguf.add_vocab_size (params.n_vocab) self.gguf.add_context_length (params.n_ctx) self.gguf.add_embedding_length (params.n_embd) self.gguf.add_block_count (params.n_layer) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index a62139811..3828253b0 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -32,6 +32,7 @@ class Keys: FILE_TYPE = "general.file_type" class LLM: + VOCAB_SIZE = "{arch}.vocab_size" CONTEXT_LENGTH = "{arch}.context_length" EMBEDDING_LENGTH = "{arch}.embedding_length" BLOCK_COUNT = "{arch}.block_count" @@ -711,6 +712,7 @@ KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE # LLM +KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 801160832..4dc02e8ef 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -313,6 +313,9 @@ class GGUFWriter: self.data_alignment = alignment self.add_uint32(Keys.General.ALIGNMENT, alignment) + def add_vocab_size(self, size: int) -> None: + self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size) + def add_context_length(self, length: int) -> None: self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length) diff --git a/llama.cpp b/llama.cpp index 4f92488d1..b1411eec9 100644 --- a/llama.cpp +++ b/llama.cpp @@ -256,6 +256,7 @@ enum llm_kv { LLM_KV_GENERAL_SOURCE_URL, LLM_KV_GENERAL_SOURCE_HF_REPO, + LLM_KV_VOCAB_SIZE, LLM_KV_CONTEXT_LENGTH, LLM_KV_EMBEDDING_LENGTH, LLM_KV_BLOCK_COUNT, @@ -314,6 +315,7 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" }, { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" }, + { LLM_KV_VOCAB_SIZE, "%s.vocab_size" }, { LLM_KV_CONTEXT_LENGTH, "%s.context_length" }, { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" }, { LLM_KV_BLOCK_COUNT, "%s.block_count" }, @@ -12485,7 +12487,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) { } int32_t llama_n_vocab(const struct llama_model * model) { - return model->vocab.id_to_token.size(); + return model->hparams.n_vocab; } int32_t llama_n_ctx_train(const struct llama_model * model) {