From 406833d77921de64d704807582c3f422221eed67 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Wed, 4 Sep 2024 12:16:56 -0600 Subject: [PATCH] feat(convert_hf_to_gguf): Add registration and param setup for Granite Branch: GraniteLM Signed-off-by: Gabe Goodhart --- convert_hf_to_gguf.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index c7e6ae0ca..56e86011b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -4080,6 +4080,34 @@ class ExaoneModel(Model): super().prepare_tensors() +@Model.register("GraniteForCausalLM") +class GraniteModel(Model): + """Conversion for IBM's GraniteForCausalLM""" + model_arch = gguf.MODEL_ARCH.GRANITE + + def set_gguf_parameters(self): + """Granite uses standard llama parameters with the following differences: + + - No head_dim support + - New multiplier params: + - attention_multiplier + - embedding_multiplier + - residual_multiplier + - logits_scaling + """ + if head_dim := self.hparams.pop("head_dim", None): + logger.warning("Ignoring head_dim (%s) from config for Granite", head_dim) + super().set_gguf_parameters() + if attention_multiplier := self.hparams.get("attention_multiplier"): + self.gguf_writer.add_attention_multiplier(attention_multiplier) + if embedding_multiplier := self.hparams.get("embedding_multiplier"): + self.gguf_writer.add_embedding_multiplier(embedding_multiplier) + if residual_multiplier := self.hparams.get("residual_multiplier"): + self.gguf_writer.add_residual_multiplier(residual_multiplier) + if logits_scaling := self.hparams.get("logits_scaling"): + self.gguf_writer.add_logit_scale(logits_scaling) + + ###### CONVERSION LOGIC ###### # tree of lazy tensors