feat(gguf-py): Add Granite model and params to gguf-py
Branch: GraniteLM Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
parent
acb2c32c33
commit
5ebc5ef572
2 changed files with 27 additions and 0 deletions
|
@ -97,6 +97,8 @@ class Keys:
|
||||||
RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
|
RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
|
||||||
TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
|
TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
|
||||||
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
||||||
|
RESIDUAL_MULTIPLIER = "{arch}.residual_multiplier"
|
||||||
|
EMBEDDING_MULTIPLIER = "{arch}.embedding_multiplier"
|
||||||
|
|
||||||
class Attention:
|
class Attention:
|
||||||
HEAD_COUNT = "{arch}.attention.head_count"
|
HEAD_COUNT = "{arch}.attention.head_count"
|
||||||
|
@ -112,6 +114,7 @@ class Keys:
|
||||||
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
||||||
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
|
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
|
||||||
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
||||||
|
MULTIPLIER = "{arch}.attention.multiplier"
|
||||||
|
|
||||||
class Rope:
|
class Rope:
|
||||||
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
||||||
|
@ -231,6 +234,7 @@ class MODEL_ARCH(IntEnum):
|
||||||
JAIS = auto()
|
JAIS = auto()
|
||||||
NEMOTRON = auto()
|
NEMOTRON = auto()
|
||||||
EXAONE = auto()
|
EXAONE = auto()
|
||||||
|
GRANITE = auto()
|
||||||
|
|
||||||
|
|
||||||
class MODEL_TENSOR(IntEnum):
|
class MODEL_TENSOR(IntEnum):
|
||||||
|
@ -387,6 +391,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.JAIS: "jais",
|
MODEL_ARCH.JAIS: "jais",
|
||||||
MODEL_ARCH.NEMOTRON: "nemotron",
|
MODEL_ARCH.NEMOTRON: "nemotron",
|
||||||
MODEL_ARCH.EXAONE: "exaone",
|
MODEL_ARCH.EXAONE: "exaone",
|
||||||
|
MODEL_ARCH.GRANITE: "granite",
|
||||||
}
|
}
|
||||||
|
|
||||||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
|
@ -1224,6 +1229,19 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.GRANITE: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
# TODO
|
# TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -679,6 +679,12 @@ class GGUFWriter:
|
||||||
def add_time_decay_extra_dim(self, dim: int) -> None:
|
def add_time_decay_extra_dim(self, dim: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
|
self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
|
||||||
|
|
||||||
|
def add_residual_multiplier(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.LLM.RESIDUAL_MULTIPLIER.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_embedding_multiplier(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.LLM.EMBEDDING_MULTIPLIER.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_wkv_head_size(self, size: int) -> None:
|
def add_wkv_head_size(self, size: int) -> None:
|
||||||
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
||||||
|
|
||||||
|
@ -703,6 +709,9 @@ class GGUFWriter:
|
||||||
def add_sliding_window(self, value: int) -> None:
|
def add_sliding_window(self, value: int) -> None:
|
||||||
self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
|
self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_attention_multiplier(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.Attention.MULTIPLIER.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_pooling_type(self, value: PoolingType) -> None:
|
def add_pooling_type(self, value: PoolingType) -> None:
|
||||||
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
|
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue