From 65c5bb91abd6cdc00be7ed0d7c8d2d771a82f6d6 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Mon, 16 Sep 2024 08:56:56 -0600
Subject: [PATCH] fix(convert_hf_to_gguf/gguf-py): _multiplier -> _scale

The transformers names with _multiplier will now be converted to the _scale
equivalent during conversion.

Branch: GraniteLM

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 convert_hf_to_gguf.py       | 20 +++++++++++---------
 gguf-py/gguf/constants.py   |  6 +++---
 gguf-py/gguf/gguf_writer.py | 12 ++++++------
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 8530557d8..ff4c9226f 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -4090,20 +4090,22 @@ class GraniteModel(LlamaModel):
 
         - No head_dim support
         - New multiplier params:
-            - attention_multiplier
-            - embedding_multiplier
-            - residual_multiplier
+            - attention_scale
+            - embedding_scale
+            - residual_scale
         - logits_scaling
         """
         if head_dim := self.hparams.pop("head_dim", None):
             logger.warning("Ignoring head_dim (%s) from config for Granite", head_dim)
         super().set_gguf_parameters()
-        if attention_multiplier := self.hparams.get("attention_multiplier"):
-            self.gguf_writer.add_attention_multiplier(attention_multiplier)
-        if embedding_multiplier := self.hparams.get("embedding_multiplier"):
-            self.gguf_writer.add_embedding_multiplier(embedding_multiplier)
-        if residual_multiplier := self.hparams.get("residual_multiplier"):
-            self.gguf_writer.add_residual_multiplier(residual_multiplier)
+        # NOTE: Convert _multiplier params to _scale params for naming
+        #   consistency
+        if attention_scale := self.hparams.get("attention_multiplier"):
+            self.gguf_writer.add_attention_scale(attention_scale)
+        if embedding_scale := self.hparams.get("embedding_multiplier"):
+            self.gguf_writer.add_embedding_scale(embedding_scale)
+        if residual_scale := self.hparams.get("residual_multiplier"):
+            self.gguf_writer.add_residual_scale(residual_scale)
         if logits_scaling := self.hparams.get("logits_scaling"):
             self.gguf_writer.add_logit_scale(logits_scaling)
 
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 88619094a..b36a60d49 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -97,8 +97,8 @@ class Keys:
         RESCALE_EVERY_N_LAYERS            = "{arch}.rescale_every_n_layers"
         TIME_MIX_EXTRA_DIM                = "{arch}.time_mix_extra_dim"
         TIME_DECAY_EXTRA_DIM              = "{arch}.time_decay_extra_dim"
-        RESIDUAL_MULTIPLIER               = "{arch}.residual_multiplier"
-        EMBEDDING_MULTIPLIER              = "{arch}.embedding_multiplier"
+        RESIDUAL_SCALE                    = "{arch}.residual_scale"
+        EMBEDDING_SCALE                   = "{arch}.embedding_scale"
 
     class Attention:
         HEAD_COUNT        = "{arch}.attention.head_count"
@@ -114,7 +114,7 @@ class Keys:
         KV_LORA_RANK      = "{arch}.attention.kv_lora_rank"
         REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
         SLIDING_WINDOW    = "{arch}.attention.sliding_window"
-        MULTIPLIER        = "{arch}.attention.multiplier"
+        SCALE             = "{arch}.attention.scale"
 
     class Rope:
         DIMENSION_COUNT         = "{arch}.rope.dimension_count"
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index aed56ac96..bd059b45c 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -679,11 +679,11 @@ class GGUFWriter:
     def add_time_decay_extra_dim(self, dim: int) -> None:
         self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
 
-    def add_residual_multiplier(self, value: float) -> None:
-        self.add_float32(Keys.LLM.RESIDUAL_MULTIPLIER.format(arch=self.arch), value)
+    def add_residual_scale(self, value: float) -> None:
+        self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
 
-    def add_embedding_multiplier(self, value: float) -> None:
-        self.add_float32(Keys.LLM.EMBEDDING_MULTIPLIER.format(arch=self.arch), value)
+    def add_embedding_scale(self, value: float) -> None:
+        self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
 
     def add_wkv_head_size(self, size: int) -> None:
         self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
@@ -709,8 +709,8 @@ class GGUFWriter:
     def add_sliding_window(self, value: int) -> None:
         self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
 
-    def add_attention_multiplier(self, value: float) -> None:
-        self.add_float32(Keys.Attention.MULTIPLIER.format(arch=self.arch), value)
+    def add_attention_scale(self, value: float) -> None:
+        self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)
 
     def add_pooling_type(self, value: PoolingType) -> None:
         self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)