diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index f500b3492..70ce29f72 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -3043,12 +3043,12 @@ class ChatGLMModel(Model): self.gguf_writer.add_tokenizer_pre(tokpre) self.gguf_writer.add_token_list(tokens) self.gguf_writer.add_token_types(toktypes) + self.gguf_writer.add_add_bos_token(False) special_vocab = gguf.SpecialVocab(dir_model, load_merges=False) special_vocab.chat_template = "chatglm4" special_vocab.merges = merges # only add special tokens when they were not already loaded from config.json - special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|endoftext|>"]) special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"]) special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"]) # this one is usually not in config.json anyway diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 1e0afe9d3..80c3478d2 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -104,7 +104,7 @@ class Keys: ADD_BOS = "tokenizer.ggml.add_bos_token" ADD_EOS = "tokenizer.ggml.add_eos_token" ADD_PREFIX = "tokenizer.ggml.add_space_prefix" - REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces" + REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces" PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap" HF_JSON = "tokenizer.huggingface.json" RWKV = "tokenizer.rwkv.world" diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml index 62129126b..36e63ee3b 100644 --- a/gguf-py/pyproject.toml +++ b/gguf-py/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gguf" -version = "0.9.1" +version = "0.9.0" description = "Read and write ML models in GGUF for GGML" authors = ["GGML "] packages = [ diff --git a/llama.cpp b/llama.cpp index 4abdfa37a..2becfee0e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -14745,10 +14745,10 @@ static std::vector llama_tokenize_internal(const llama_vocab & } // add prefix to chatglm3 if (vocab.type_pre == LLAMA_VOCAB_PRE_TYPE_CHATGLM3) { - output.push_back(64790); - output.push_back(64792); - output.push_back(64795); - output.push_back(30910); + output.push_back(64790); // [gMask] + output.push_back(64792); // sop + output.push_back(64795); // <|user|> + output.push_back(30910); // \n output.push_back(13); } @@ -14787,7 +14787,7 @@ static std::vector llama_tokenize_internal(const llama_vocab & } // add suffix to chatglm3 if (vocab.type_pre == LLAMA_VOCAB_PRE_TYPE_CHATGLM3) { - output.push_back(64796); + output.push_back(64796); // <|assistant|> } } break; case LLAMA_VOCAB_TYPE_BPE: @@ -14799,10 +14799,10 @@ static std::vector llama_tokenize_internal(const llama_vocab & } // add prefix to chatglm4 if (vocab.type_pre == LLAMA_VOCAB_PRE_TYPE_CHATGLM4) { - output.push_back(151331); - output.push_back(151333); - output.push_back(151336); - output.push_back(198); + output.push_back(151331); // [gMASK] + output.push_back(151333); // + output.push_back(151336); // <|user|> + output.push_back(198); // \n } for (const auto & fragment : fragment_buffer) { if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) { @@ -14823,7 +14823,7 @@ static std::vector llama_tokenize_internal(const llama_vocab & } // add suffix to chatglm4 if (vocab.type_pre == LLAMA_VOCAB_PRE_TYPE_CHATGLM4) { - output.push_back(151337); + output.push_back(151337); // <|assistant|> } } break; case LLAMA_VOCAB_TYPE_WPM: