diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 28b060ed3..06fa9996d 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -408,6 +408,8 @@ class Model(ABC): if res is None: raise NotImplementedError(f"BPE pre-tokenizer was not recognized - update get_vocab_base_pre()") + return res + def _set_vocab_gpt2(self) -> None: tokens, toktypes, tokpre = self.get_vocab_base() self.gguf_writer.add_tokenizer_model("gpt2") diff --git a/llama.cpp b/llama.cpp index e05d10cdb..d8e691c18 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4290,7 +4290,7 @@ static void llm_load_vocab( } if (tokenizer_pre.empty()) { - LLAMA_LOG_WARN("%s: missing tokenizer pre, using default tokenizer pre: 'default'", __func__); + LLAMA_LOG_WARN("%s: missing tokenizer pre, using default tokenizer pre: 'default'\n", __func__); vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; } else if (tokenizer_pre == "default") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; diff --git a/models/ggml-vocab-deepseek-coder.gguf b/models/ggml-vocab-deepseek-coder.gguf index 8ea17fa4d..60de2c4b5 100644 Binary files a/models/ggml-vocab-deepseek-coder.gguf and b/models/ggml-vocab-deepseek-coder.gguf differ diff --git a/models/ggml-vocab-deepseek-llm.gguf b/models/ggml-vocab-deepseek-llm.gguf index 1e087220f..be0931fdf 100644 Binary files a/models/ggml-vocab-deepseek-llm.gguf and b/models/ggml-vocab-deepseek-llm.gguf differ