From c7fdbd3735a005afe148f87f3fee238839135f21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= Date: Sat, 14 Dec 2024 11:59:59 +0100 Subject: [PATCH] convert-hf : use zero value of sliding_window to distinguish Phi-4 from other PHI3 models --- convert_hf_to_gguf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 2c4558e27..f99203f78 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2254,9 +2254,11 @@ class Phi3MiniModel(Model): self.gguf_writer.add_rope_dimension_count(rope_dims) self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"])) self.gguf_writer.add_file_type(self.ftype) - # handle null value of sliding_window (Phi-4 model) - if (sliding_window := self.hparams.get("sliding_window")) is not None: - self.gguf_writer.add_sliding_window(sliding_window) + sliding_window = self.hparams.get("sliding_window") + # use zero value of sliding_window to distinguish Phi-4 from other PHI3 models + if sliding_window is None: + sliding_window = 0 + self.gguf_writer.add_sliding_window(sliding_window) def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: n_embd = self.find_hparam(["hidden_size", "n_embd"])