From c6396aa4bba8a508b262cc9fb8baada75f620db8 Mon Sep 17 00:00:00 2001 From: Victor Oluwadare <111367022+Victoran0@users.noreply.github.com> Date: Tue, 8 Oct 2024 02:35:08 +0100 Subject: [PATCH] Added support for SFTTrainer checkpoint models and adapter models containing some non-LoRA weights The previous code triggers an unexpected name error and calls sys.exit(1) (lines 350-351 current version) even if a single weight in the lora_model is not a lora_A, lora_B, or base layer weight. This edit collects the names of all LoRA weights in the model before the for loop in line 341 (current version). And in line 350 (edit version), the subsequent operations are performed only on the LoRA and base layer weights, ignoring any non-LoRA weights in the lora_model. Hopefully, this helps by allowing the script to extract LoRA weights and convert LoRA to GGUF for adapters containing one or more non-LoRA weights. --- convert_lora_to_gguf.py | 51 +++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index 439a78de1..61a945bd3 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -338,28 +338,39 @@ if __name__ == '__main__': def get_tensors(self) -> Iterator[tuple[str, Tensor]]: tensor_map: dict[str, PartialLoraTensor] = {} - for name, tensor in lora_model.items(): - if self.lazy: - tensor = LazyTorchTensor.from_eager(tensor) - base_name = get_base_tensor_name(name) - is_lora_a = ".lora_A.weight" in name - is_lora_b = ".lora_B.weight" in name - if not is_lora_a and not is_lora_b: - if ".base_layer.weight" in name: - continue - logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor") - sys.exit(1) + # The following edits will enable conversion for: SFTTrainer checkpoint adapter models and other adapter models that contain weights besides LoRA weights - if base_name in tensor_map: - if is_lora_a: - tensor_map[base_name].A = tensor + # Here, we first get the items with the 'lora_' substring + lora_model_items_name = [name for name,_ in lora_model.items()] + lora_model_items_with_lora_tensors = [name for name in lora_model_items_name if 'lora_' in name] + + for name, tensor in lora_model.items(): + + # Check for only LoRA finetuned weights and base layer weights + if (name in lora_model_items_with_lora_tensors) or (".base_layer.weight" in name): + if self.lazy: + tensor = LazyTorchTensor.from_eager(tensor) + base_name = get_base_tensor_name(name) + is_lora_a = ".lora_A.weight" in name + is_lora_b = ".lora_B.weight" in name + if not is_lora_a and not is_lora_b: + if ".base_layer.weight" in name: + continue + + # we will either have a lora weight or a base layer weight, this error becomes trivial + # logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor") + # sys.exit(1) + + if base_name in tensor_map: + if is_lora_a: + tensor_map[base_name].A = tensor + else: + tensor_map[base_name].B = tensor else: - tensor_map[base_name].B = tensor - else: - if is_lora_a: - tensor_map[base_name] = PartialLoraTensor(A=tensor) - else: - tensor_map[base_name] = PartialLoraTensor(B=tensor) + if is_lora_a: + tensor_map[base_name] = PartialLoraTensor(A=tensor) + else: + tensor_map[base_name] = PartialLoraTensor(B=tensor) for name, tensor in tensor_map.items(): assert tensor.A is not None