diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index f500b3492..70ce29f72 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -3043,12 +3043,12 @@ class ChatGLMModel(Model):
         self.gguf_writer.add_tokenizer_pre(tokpre)
         self.gguf_writer.add_token_list(tokens)
         self.gguf_writer.add_token_types(toktypes)
+        self.gguf_writer.add_add_bos_token(False)
 
         special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
         special_vocab.chat_template = "chatglm4"
         special_vocab.merges = merges
         # only add special tokens when they were not already loaded from config.json
-        special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|endoftext|>"])
         special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
         special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"])
         # this one is usually not in config.json anyway
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 1e0afe9d3..80c3478d2 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -104,7 +104,7 @@ class Keys:
         ADD_BOS          = "tokenizer.ggml.add_bos_token"
         ADD_EOS          = "tokenizer.ggml.add_eos_token"
         ADD_PREFIX       = "tokenizer.ggml.add_space_prefix"
-        REMOVE_EXTRA_WS      = "tokenizer.ggml.remove_extra_whitespaces"
+        REMOVE_EXTRA_WS  = "tokenizer.ggml.remove_extra_whitespaces"
         PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
         HF_JSON          = "tokenizer.huggingface.json"
         RWKV             = "tokenizer.rwkv.world"
diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml
index 62129126b..36e63ee3b 100644
--- a/gguf-py/pyproject.toml
+++ b/gguf-py/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "gguf"
-version = "0.9.1"
+version = "0.9.0"
 description = "Read and write ML models in GGUF for GGML"
 authors = ["GGML <ggml@ggml.ai>"]
 packages = [
diff --git a/llama.cpp b/llama.cpp
index 4abdfa37a..2becfee0e 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -14745,10 +14745,10 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                 }
                 // add prefix to chatglm3
                 if (vocab.type_pre == LLAMA_VOCAB_PRE_TYPE_CHATGLM3) {
-                    output.push_back(64790);
-                    output.push_back(64792);
-                    output.push_back(64795);
-                    output.push_back(30910);
+                    output.push_back(64790); // [gMask]
+                    output.push_back(64792); // sop
+                    output.push_back(64795); // <|user|>
+                    output.push_back(30910); // \n
                     output.push_back(13);
                 }
 
@@ -14787,7 +14787,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                 }
                 // add suffix to chatglm3
                 if (vocab.type_pre == LLAMA_VOCAB_PRE_TYPE_CHATGLM3) {
-                    output.push_back(64796);
+                    output.push_back(64796); // <|assistant|>
                 }
             } break;
         case LLAMA_VOCAB_TYPE_BPE:
@@ -14799,10 +14799,10 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                 }
                 // add prefix to chatglm4
                 if (vocab.type_pre == LLAMA_VOCAB_PRE_TYPE_CHATGLM4) {
-                    output.push_back(151331);
-                    output.push_back(151333);
-                    output.push_back(151336);
-                    output.push_back(198);
+                    output.push_back(151331); // [gMASK]
+                    output.push_back(151333); // <sop>
+                    output.push_back(151336); // <|user|>
+                    output.push_back(198); // \n
                 }
                 for (const auto & fragment : fragment_buffer) {
                     if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
@@ -14823,7 +14823,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                 }
                 // add suffix to chatglm4
                 if (vocab.type_pre == LLAMA_VOCAB_PRE_TYPE_CHATGLM4) {
-                    output.push_back(151337);
+                    output.push_back(151337); // <|assistant|>
                 }
             } break;
         case LLAMA_VOCAB_TYPE_WPM: