convert_lora : MoE LoRA conversion support

* convert_lora : prefer safetensors, similarly to convert_hf
2024-07-09 18:26:38 -04:00 · 2024-07-09 18:26:38 -04:00 · 9d96328bdf
commit 9d96328bdf
parent 916e95928b
2 changed files with 218 additions and 59 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -373,9 +373,6 @@ class Model:
        except KeyError:
            raise NotImplementedError(f'Architecture {arch!r} not supported!') from None

-    def support_lora(self) -> bool:
-        return False
-
    # used for GPT-2 BPE and WordPiece vocabs
    def get_vocab_base(self) -> tuple[list[str], list[int], str]:
        tokens: list[str] = []
@ -1415,9 +1412,9 @@ class LlamaModel(Model):
        n_head = self.hparams["num_attention_heads"]
        n_kv_head = self.hparams.get("num_key_value_heads")

-        if name.endswith(("q_proj.weight", "q_proj.bias", "q_proj.lora_B.weight")):
+        if name.endswith(("q_proj.weight", "q_proj.bias")):
            data_torch = LlamaModel.permute(data_torch, n_head, n_head)
-        if name.endswith(("k_proj.weight", "k_proj.bias", "k_proj.lora_B.weight")):
+        if name.endswith(("k_proj.weight", "k_proj.bias")):
            data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)

        # process the experts separately
@ -1465,10 +1462,6 @@ class LlamaModel(Model):
            if len(experts) > 0:
                raise ValueError(f"Unprocessed experts: {experts}")

-    def support_lora(self) -> bool:
-        # TODO: support lora conversion for MOE
-        return "num_local_experts" not in self.hparams
-

@Model.register("BitnetForCausalLM")
 class BitnetModel(Model):