From e7c7ae8b6e36e7427f8ccedaaac331288991c7f0 Mon Sep 17 00:00:00 2001 From: teleprint-me <77757836+teleprint-me@users.noreply.github.com> Date: Fri, 17 May 2024 03:18:50 -0400 Subject: [PATCH] patch: Add pre-tokenizer metadata to phi-2 Signed-off-by: teleprint-me <77757836+teleprint-me@users.noreply.github.com> --- convert-hf-to-gguf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 247a117fd..e9156a832 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1723,6 +1723,7 @@ class Phi2Model(Model): n_head = self.find_hparam(["num_attention_heads", "n_head"]) self.gguf_writer.add_name("Phi2") + self.gguf_writer.add_tokenizer_pre("gpt-2") self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"])) self.gguf_writer.add_embedding_length(n_embd)