convert-hf: add missing ftype

2024-05-11 23:18:30 -04:00 · 2024-05-11 23:18:30 -04:00 · 2b1e5ea37b
commit 2b1e5ea37b
parent d7e199e444
2 changed files with 10 additions and 1 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -1202,6 +1202,7 @@ class StableLMModel(Model):
        self.gguf_writer.add_head_count_kv(hparams["num_key_value_heads"])
        self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
        self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
+        self.gguf_writer.add_file_type(self.ftype)

    _q_norms: list[dict[str, Tensor]] | None = None
    _k_norms: list[dict[str, Tensor]] | None = None
@ -1578,6 +1579,7 @@ class QwenModel(Model):
        self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
        self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
        self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"])
+        self.gguf_writer.add_file_type(self.ftype)


@Model.register("Qwen2ForCausalLM")
@ -1815,6 +1817,7 @@ class PlamoModel(Model):
        self.gguf_writer.add_head_count(hparams["num_attention_heads"])
        self.gguf_writer.add_head_count_kv(5)  # hparams["num_key_value_heads"]) is wrong
        self.gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"])
+        self.gguf_writer.add_file_type(self.ftype)

    def shuffle_attn_q_weight(self, data_torch):
        assert data_torch.size() == (5120, 5120)
@ -1994,6 +1997,7 @@ in chat mode so that the conversation can end normally.")
        self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
        self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
        self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
+        self.gguf_writer.add_file_type(self.ftype)

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
        num_heads = self.hparams["num_attention_heads"]
--- a/gguf-py/gguf/lazy.py
+++ b/gguf-py/gguf/lazy.py
@ -174,7 +174,12 @@ class LazyBase(ABC, metaclass=LazyMeta):
            while _t._data is None:
                lt = _t._lazy.popleft()
                if lt._data is not None:
-                    raise ValueError(f"{lt} did not belong in the lazy queue")
+                    # Lazy tensor did not belong in the lazy queue.
+                    # Weirdly only happens with Bloom models...
+                    # likely because tensors aren't unique in the queue.
+                    # The final output is still the same as in eager mode,
+                    # so it's safe to ignore this.
+                    continue
                assert lt._func is not None
                lt._args = cls._recurse_apply(lt._args, already_eager_to_eager)
                lt._data = lt._func(lt._args)