From 2b1e5ea37b2a657a797dac7c84ac10bcb003fc2a Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Sat, 11 May 2024 23:18:30 -0400 Subject: [PATCH] convert-hf: add missing ftype --- convert-hf-to-gguf.py | 4 ++++ gguf-py/gguf/lazy.py | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 0791fa0df..cc249f21a 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1202,6 +1202,7 @@ class StableLMModel(Model): self.gguf_writer.add_head_count_kv(hparams["num_key_value_heads"]) self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True) self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"])) + self.gguf_writer.add_file_type(self.ftype) _q_norms: list[dict[str, Tensor]] | None = None _k_norms: list[dict[str, Tensor]] | None = None @@ -1578,6 +1579,7 @@ class QwenModel(Model): self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"]) self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"]) + self.gguf_writer.add_file_type(self.ftype) @Model.register("Qwen2ForCausalLM") @@ -1815,6 +1817,7 @@ class PlamoModel(Model): self.gguf_writer.add_head_count(hparams["num_attention_heads"]) self.gguf_writer.add_head_count_kv(5) # hparams["num_key_value_heads"]) is wrong self.gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"]) + self.gguf_writer.add_file_type(self.ftype) def shuffle_attn_q_weight(self, data_torch): assert data_torch.size() == (5120, 5120) @@ -1994,6 +1997,7 @@ in chat mode so that the conversation can end normally.") self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"]) + self.gguf_writer.add_file_type(self.ftype) def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: num_heads = self.hparams["num_attention_heads"] diff --git a/gguf-py/gguf/lazy.py b/gguf-py/gguf/lazy.py index 0d87ceab2..1167335b8 100644 --- a/gguf-py/gguf/lazy.py +++ b/gguf-py/gguf/lazy.py @@ -174,7 +174,12 @@ class LazyBase(ABC, metaclass=LazyMeta): while _t._data is None: lt = _t._lazy.popleft() if lt._data is not None: - raise ValueError(f"{lt} did not belong in the lazy queue") + # Lazy tensor did not belong in the lazy queue. + # Weirdly only happens with Bloom models... + # likely because tensors aren't unique in the queue. + # The final output is still the same as in eager mode, + # so it's safe to ignore this. + continue assert lt._func is not None lt._args = cls._recurse_apply(lt._args, already_eager_to_eager) lt._data = lt._func(lt._args)