llama : add OpenELM support (#7359)
* Initial OpenELM support (270M only so far) * Fill out missing entries in llama_model_type_name * fixup! Initial OpenELM support (270M only so far) Fix formatting * llama : support all OpenELM models * llama : add variable GQA and variable FFN sizes Some metadata keys can now also be arrays to support setting their value per-layer for models like OpenELM. * llama : minor spacing changes Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * llama : use std::array for per-layer hparams * llama : fix save/load state * llama : do not print hparams for vocab-only models * llama : handle n_head == 0 * llama : use const ref for print_f and fix division by zero * llama : fix t5 uses of n_head and n_ff * llama : minor comment --------- Co-authored-by: Francis Couture-Harpin <git@compilade.net> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
6f63d646c1
commit
d7fd29fff1
5 changed files with 676 additions and 176 deletions
|
@ -160,6 +160,7 @@ class MODEL_ARCH(IntEnum):
|
|||
COMMAND_R = auto()
|
||||
DBRX = auto()
|
||||
OLMO = auto()
|
||||
OPENELM = auto()
|
||||
ARCTIC = auto()
|
||||
DEEPSEEK2 = auto()
|
||||
BITNET = auto()
|
||||
|
@ -285,6 +286,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|||
MODEL_ARCH.COMMAND_R: "command-r",
|
||||
MODEL_ARCH.DBRX: "dbrx",
|
||||
MODEL_ARCH.OLMO: "olmo",
|
||||
MODEL_ARCH.OPENELM: "openelm",
|
||||
MODEL_ARCH.ARCTIC: "arctic",
|
||||
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
||||
MODEL_ARCH.BITNET: "bitnet",
|
||||
|
@ -861,6 +863,19 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
],
|
||||
MODEL_ARCH.OPENELM: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_QKV,
|
||||
MODEL_TENSOR.ATTN_Q_NORM,
|
||||
MODEL_TENSOR.ATTN_K_NORM,
|
||||
MODEL_TENSOR.ATTN_OUT,
|
||||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.FFN_GATE,
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
],
|
||||
MODEL_ARCH.ARCTIC: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue