llama : add AWQ for llama, llama2, mpt, and mistral models (#4593)
* update: awq support llama-7b model * update: change order * update: benchmark results for llama2-7b * update: mistral 7b v1 benchmark * update: support 4 models * fix: Readme * update: ready for PR * update: readme * fix: readme * update: change order import * black * format code * update: work for bot mpt and awqmpt * update: readme * Rename to llm_build_ffn_mpt_awq * Formatted other files * Fixed params count * fix: remove code * update: more detail for mpt * fix: readme * fix: readme * update: change folder architecture * fix: common.cpp * fix: readme * fix: remove ggml_repeat * update: cicd * update: cicd * uppdate: remove use_awq arg * update: readme * llama : adapt plamo to new ffn ggml-ci --------- Co-authored-by: Trần Đức Nam <v.namtd12@vinai.io> Co-authored-by: Le Hoang Anh <v.anhlh33@vinai.io> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
879b690a9e
commit
f6793491b5
8 changed files with 443 additions and 5 deletions
|
@ -120,6 +120,7 @@ class MODEL_TENSOR(IntEnum):
|
|||
FFN_GATE = auto()
|
||||
FFN_DOWN = auto()
|
||||
FFN_UP = auto()
|
||||
FFN_ACT = auto()
|
||||
FFN_GATE_EXP = auto()
|
||||
FFN_DOWN_EXP = auto()
|
||||
FFN_UP_EXP = auto()
|
||||
|
@ -169,6 +170,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|||
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
|
||||
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate.{xid}",
|
||||
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}",
|
||||
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}",
|
||||
|
@ -269,6 +271,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
MODEL_TENSOR.FFN_ACT,
|
||||
],
|
||||
MODEL_ARCH.GPTJ: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue