llama : support all OpenELM models

* llama : add variable GQA and variable FFN sizes

Some metadata keys can now also be arrays to support setting
their value per-layer for models like OpenELM.
This commit is contained in:
Francis Couture-Harpin 2024-06-30 23:13:48 -04:00
parent 51b2577dd4
commit c8cdb48d10
5 changed files with 247 additions and 188 deletions

View file

@ -869,8 +869,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.ATTN_K_NORM,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.ARCTIC: [
MODEL_TENSOR.TOKEN_EMBD,