diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index c5d2d0b78..78a2b1c67 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -1559,6 +1559,7 @@ class InternLM2Model(Model):
         self.gguf_writer.add_add_space_prefix(add_prefix)
 
         special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+        print(special_vocab)
         old_eos = special_vocab.special_token_ids["eos"]
         if "chat" in os.path.basename(self.dir_model.absolute()):
             # For the chat model, we replace the eos with '<|im_end|>'.
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index e2d07a631..2a19817d9 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -795,7 +795,9 @@ int main(int argc, char ** argv) {
             }
 
             // deal with end of text token in interactive mode
-            if (llama_sampling_last(ctx_sampling) == llama_token_eos(model)) {
+            auto last_token = llama_sampling_last(ctx_sampling);
+            if (last_token == llama_token_eos(model) || last_token == 122753)
+            {
                 LOG("found EOS token\n");
 
                 if (params.interactive) {
@@ -920,7 +922,7 @@ int main(int argc, char ** argv) {
         }
 
         // end of text token
-        if (!embd.empty() && embd.back() == llama_token_eos(model) && !(params.instruct || params.interactive || params.chatml)) {
+        if (!embd.empty() && (embd.back() == llama_token_eos(model) || embd.back() == 122753) && !(params.instruct || params.interactive || params.chatml)) {
             LOG_TEE(" [end of text]\n");
             break;
         }
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 4ab026482..ee26224d4 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -548,6 +548,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
     ],
     MODEL_ARCH.MINICPM: [
         MODEL_TENSOR.TOKEN_EMBD,
+        MODEL_TENSOR.OUTPUT,
         MODEL_TENSOR.OUTPUT_NORM,
         MODEL_TENSOR.ROPE_FREQS,
         MODEL_TENSOR.ATTN_NORM,
diff --git a/llama.cpp b/llama.cpp
index 892d46fbc..8363233fa 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4375,6 +4375,7 @@ static bool llm_load_tensors(
             case LLM_ARCH_MINICPM:
                 {
                     model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
+                    model.output      = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, false);
 
                     // output
                     {
@@ -8699,7 +8700,7 @@ struct llm_build_context {
         cb(cur, "lmhead_scaling", -1);
 
         // lm_head
-        cur = ggml_mul_mat(ctx0, model.tok_embd, cur);
+        cur = ggml_mul_mat(ctx0, model.output, cur);
         cb(cur, "result_output", -1);
 
         ggml_build_forward_expand(gf, cur);