llama : add OpenELM support (#7359)
* Initial OpenELM support (270M only so far) * Fill out missing entries in llama_model_type_name * fixup! Initial OpenELM support (270M only so far) Fix formatting * llama : support all OpenELM models * llama : add variable GQA and variable FFN sizes Some metadata keys can now also be arrays to support setting their value per-layer for models like OpenELM. * llama : minor spacing changes Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * llama : use std::array for per-layer hparams * llama : fix save/load state * llama : do not print hparams for vocab-only models * llama : handle n_head == 0 * llama : use const ref for print_f and fix division by zero * llama : fix t5 uses of n_head and n_ff * llama : minor comment --------- Co-authored-by: Francis Couture-Harpin <git@compilade.net> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
6f63d646c1
commit
d7fd29fff1
5 changed files with 676 additions and 176 deletions
|
@ -480,8 +480,11 @@ class GGUFWriter:
|
|||
def add_leading_dense_block_count(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length)
|
||||
|
||||
def add_feed_forward_length(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||
def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
|
||||
if isinstance(length, int):
|
||||
self.add_uint32(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||
else:
|
||||
self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||
|
||||
def add_expert_feed_forward_length(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||
|
@ -495,11 +498,17 @@ class GGUFWriter:
|
|||
def add_decoder_start_token_id(self, id: int) -> None:
|
||||
self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id)
|
||||
|
||||
def add_head_count(self, count: int) -> None:
|
||||
self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
|
||||
def add_head_count(self, count: int | Sequence[int]) -> None:
|
||||
if isinstance(count, int):
|
||||
self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
|
||||
else:
|
||||
self.add_array(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
|
||||
|
||||
def add_head_count_kv(self, count: int) -> None:
|
||||
self.add_uint32(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
|
||||
def add_head_count_kv(self, count: int | Sequence[int]) -> None:
|
||||
if isinstance(count, int):
|
||||
self.add_uint32(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
|
||||
else:
|
||||
self.add_array(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
|
||||
|
||||
def add_key_length(self, length: int) -> None:
|
||||
self.add_uint32(Keys.Attention.KEY_LENGTH.format(arch=self.arch), length)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue