fix(convert_hf_to_gguf/gguf-py): _multiplier -> _scale
The transformers names with _multiplier will now be converted to the _scale equivalent during conversion. Branch: GraniteLM Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
parent
0bdf04e7b5
commit
65c5bb91ab
3 changed files with 20 additions and 18 deletions
|
@ -4090,20 +4090,22 @@ class GraniteModel(LlamaModel):
|
||||||
|
|
||||||
- No head_dim support
|
- No head_dim support
|
||||||
- New multiplier params:
|
- New multiplier params:
|
||||||
- attention_multiplier
|
- attention_scale
|
||||||
- embedding_multiplier
|
- embedding_scale
|
||||||
- residual_multiplier
|
- residual_scale
|
||||||
- logits_scaling
|
- logits_scaling
|
||||||
"""
|
"""
|
||||||
if head_dim := self.hparams.pop("head_dim", None):
|
if head_dim := self.hparams.pop("head_dim", None):
|
||||||
logger.warning("Ignoring head_dim (%s) from config for Granite", head_dim)
|
logger.warning("Ignoring head_dim (%s) from config for Granite", head_dim)
|
||||||
super().set_gguf_parameters()
|
super().set_gguf_parameters()
|
||||||
if attention_multiplier := self.hparams.get("attention_multiplier"):
|
# NOTE: Convert _multiplier params to _scale params for naming
|
||||||
self.gguf_writer.add_attention_multiplier(attention_multiplier)
|
# consistency
|
||||||
if embedding_multiplier := self.hparams.get("embedding_multiplier"):
|
if attention_scale := self.hparams.get("attention_multiplier"):
|
||||||
self.gguf_writer.add_embedding_multiplier(embedding_multiplier)
|
self.gguf_writer.add_attention_scale(attention_scale)
|
||||||
if residual_multiplier := self.hparams.get("residual_multiplier"):
|
if embedding_scale := self.hparams.get("embedding_multiplier"):
|
||||||
self.gguf_writer.add_residual_multiplier(residual_multiplier)
|
self.gguf_writer.add_embedding_scale(embedding_scale)
|
||||||
|
if residual_scale := self.hparams.get("residual_multiplier"):
|
||||||
|
self.gguf_writer.add_residual_scale(residual_scale)
|
||||||
if logits_scaling := self.hparams.get("logits_scaling"):
|
if logits_scaling := self.hparams.get("logits_scaling"):
|
||||||
self.gguf_writer.add_logit_scale(logits_scaling)
|
self.gguf_writer.add_logit_scale(logits_scaling)
|
||||||
|
|
||||||
|
|
|
@ -97,8 +97,8 @@ class Keys:
|
||||||
RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
|
RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
|
||||||
TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
|
TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
|
||||||
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
||||||
RESIDUAL_MULTIPLIER = "{arch}.residual_multiplier"
|
RESIDUAL_SCALE = "{arch}.residual_scale"
|
||||||
EMBEDDING_MULTIPLIER = "{arch}.embedding_multiplier"
|
EMBEDDING_SCALE = "{arch}.embedding_scale"
|
||||||
|
|
||||||
class Attention:
|
class Attention:
|
||||||
HEAD_COUNT = "{arch}.attention.head_count"
|
HEAD_COUNT = "{arch}.attention.head_count"
|
||||||
|
@ -114,7 +114,7 @@ class Keys:
|
||||||
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
||||||
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
|
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
|
||||||
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
||||||
MULTIPLIER = "{arch}.attention.multiplier"
|
SCALE = "{arch}.attention.scale"
|
||||||
|
|
||||||
class Rope:
|
class Rope:
|
||||||
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
||||||
|
|
|
@ -679,11 +679,11 @@ class GGUFWriter:
|
||||||
def add_time_decay_extra_dim(self, dim: int) -> None:
|
def add_time_decay_extra_dim(self, dim: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
|
self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
|
||||||
|
|
||||||
def add_residual_multiplier(self, value: float) -> None:
|
def add_residual_scale(self, value: float) -> None:
|
||||||
self.add_float32(Keys.LLM.RESIDUAL_MULTIPLIER.format(arch=self.arch), value)
|
self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_embedding_multiplier(self, value: float) -> None:
|
def add_embedding_scale(self, value: float) -> None:
|
||||||
self.add_float32(Keys.LLM.EMBEDDING_MULTIPLIER.format(arch=self.arch), value)
|
self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_wkv_head_size(self, size: int) -> None:
|
def add_wkv_head_size(self, size: int) -> None:
|
||||||
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
||||||
|
@ -709,8 +709,8 @@ class GGUFWriter:
|
||||||
def add_sliding_window(self, value: int) -> None:
|
def add_sliding_window(self, value: int) -> None:
|
||||||
self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
|
self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_attention_multiplier(self, value: float) -> None:
|
def add_attention_scale(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Attention.MULTIPLIER.format(arch=self.arch), value)
|
self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_pooling_type(self, value: PoolingType) -> None:
|
def add_pooling_type(self, value: PoolingType) -> None:
|
||||||
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
|
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue