names : for brevity "SHARED_EXP" -> "SHEXP"

This commit is contained in:
Georgi Gerganov 2024-04-16 09:01:40 +03:00
parent 7355ca84b5
commit f88e6844a4
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 136 additions and 136 deletions

View file

@ -148,7 +148,7 @@ class MODEL_TENSOR(IntEnum):
ATTN_OUT_NORM = auto() ATTN_OUT_NORM = auto()
ATTN_ROT_EMBD = auto() ATTN_ROT_EMBD = auto()
FFN_GATE_INP = auto() FFN_GATE_INP = auto()
FFN_GATE_INP_SHARED_EXP = auto() FFN_GATE_INP_SHEXP = auto()
FFN_NORM = auto() FFN_NORM = auto()
FFN_GATE = auto() FFN_GATE = auto()
FFN_DOWN = auto() FFN_DOWN = auto()
@ -157,9 +157,9 @@ class MODEL_TENSOR(IntEnum):
FFN_GATE_EXP = auto() FFN_GATE_EXP = auto()
FFN_DOWN_EXP = auto() FFN_DOWN_EXP = auto()
FFN_UP_EXP = auto() FFN_UP_EXP = auto()
FFN_GATE_SHARED_EXP = auto() FFN_GATE_SHEXP = auto()
FFN_DOWN_SHARED_EXP = auto() FFN_DOWN_SHEXP = auto()
FFN_UP_SHARED_EXP = auto() FFN_UP_SHEXP = auto()
ATTN_Q_NORM = auto() ATTN_Q_NORM = auto()
ATTN_K_NORM = auto() ATTN_K_NORM = auto()
LAYER_OUT_NORM = auto() LAYER_OUT_NORM = auto()
@ -225,14 +225,14 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm", MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm", MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp", MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
MODEL_TENSOR.FFN_GATE_INP_SHARED_EXP: "blk.{bid}.ffn_gate_inp_shared_exp", MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm", MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate", MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
MODEL_TENSOR.FFN_GATE_SHARED_EXP: "blk.{bid}.ffn_gate_shared_exp", MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
MODEL_TENSOR.FFN_DOWN_SHARED_EXP: "blk.{bid}.ffn_down_shared_exp", MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
MODEL_TENSOR.FFN_UP_SHARED_EXP: "blk.{bid}.ffn_up_shared_exp", MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn", MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps", MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps", MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
@ -493,10 +493,10 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_GATE_EXP, MODEL_TENSOR.FFN_GATE_EXP,
MODEL_TENSOR.FFN_DOWN_EXP, MODEL_TENSOR.FFN_DOWN_EXP,
MODEL_TENSOR.FFN_UP_EXP, MODEL_TENSOR.FFN_UP_EXP,
MODEL_TENSOR.FFN_GATE_INP_SHARED_EXP, MODEL_TENSOR.FFN_GATE_INP_SHEXP,
MODEL_TENSOR.FFN_GATE_SHARED_EXP, MODEL_TENSOR.FFN_GATE_SHEXP,
MODEL_TENSOR.FFN_DOWN_SHARED_EXP, MODEL_TENSOR.FFN_DOWN_SHEXP,
MODEL_TENSOR.FFN_UP_SHARED_EXP, MODEL_TENSOR.FFN_UP_SHEXP,
], ],
MODEL_ARCH.PLAMO: [ MODEL_ARCH.PLAMO: [
MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.TOKEN_EMBD,

View file

@ -213,7 +213,7 @@ class TensorNameMap:
"transformer.blocks.{bid}.ffn.router.layer", # dbrx "transformer.blocks.{bid}.ffn.router.layer", # dbrx
), ),
MODEL_TENSOR.FFN_GATE_INP_SHARED_EXP: ( MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
), ),
@ -247,7 +247,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged) "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
), ),
MODEL_TENSOR.FFN_UP_SHARED_EXP: ( MODEL_TENSOR.FFN_UP_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
), ),
@ -273,7 +273,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged) "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
), ),
MODEL_TENSOR.FFN_GATE_SHARED_EXP: ( MODEL_TENSOR.FFN_GATE_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
), ),
@ -306,7 +306,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged) "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
), ),
MODEL_TENSOR.FFN_DOWN_SHARED_EXP: ( MODEL_TENSOR.FFN_DOWN_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
), ),

View file

@ -431,7 +431,7 @@ enum llm_tensor {
LLM_TENSOR_ATTN_OUT_NORM, LLM_TENSOR_ATTN_OUT_NORM,
LLM_TENSOR_ATTN_ROT_EMBD, LLM_TENSOR_ATTN_ROT_EMBD,
LLM_TENSOR_FFN_GATE_INP, LLM_TENSOR_FFN_GATE_INP,
LLM_TENSOR_FFN_GATE_INP_SHARED_EXP, LLM_TENSOR_FFN_GATE_INP_SHEXP,
LLM_TENSOR_FFN_NORM, LLM_TENSOR_FFN_NORM,
LLM_TENSOR_FFN_GATE, LLM_TENSOR_FFN_GATE,
LLM_TENSOR_FFN_DOWN, LLM_TENSOR_FFN_DOWN,
@ -440,12 +440,12 @@ enum llm_tensor {
LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
LLM_TENSOR_FFN_GATE_EXP, LLM_TENSOR_FFN_GATE_EXP,
LLM_TENSOR_FFN_UP_EXP, LLM_TENSOR_FFN_UP_EXP,
LLM_TENSOR_FFN_DOWN_SHARED_EXP,
LLM_TENSOR_FFN_GATE_SHARED_EXP,
LLM_TENSOR_FFN_UP_SHARED_EXP,
LLM_TENSOR_FFN_DOWN_EXPS, // merged experts LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
LLM_TENSOR_FFN_GATE_EXPS, LLM_TENSOR_FFN_GATE_EXPS,
LLM_TENSOR_FFN_UP_EXPS, LLM_TENSOR_FFN_UP_EXPS,
LLM_TENSOR_FFN_DOWN_SHEXP,
LLM_TENSOR_FFN_GATE_SHEXP,
LLM_TENSOR_FFN_UP_SHEXP,
LLM_TENSOR_ATTN_Q_NORM, LLM_TENSOR_ATTN_Q_NORM,
LLM_TENSOR_ATTN_K_NORM, LLM_TENSOR_ATTN_K_NORM,
LLM_TENSOR_LAYER_OUT_NORM, LLM_TENSOR_LAYER_OUT_NORM,
@ -759,10 +759,10 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" }, { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" }, { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
{ LLM_TENSOR_FFN_GATE_INP_SHARED_EXP, "blk.%d.ffn_gate_inp_shared_exp" }, { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
{ LLM_TENSOR_FFN_GATE_SHARED_EXP, "blk.%d.ffn_gate_shared_exp" }, { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
{ LLM_TENSOR_FFN_DOWN_SHARED_EXP, "blk.%d.ffn_down_shared_exp" }, { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
{ LLM_TENSOR_FFN_UP_SHARED_EXP, "blk.%d.ffn_up_shared_exp" }, { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
}, },
}, },
{ {
@ -1938,11 +1938,11 @@ struct llama_layer {
struct ggml_tensor * ffn_down_exps; struct ggml_tensor * ffn_down_exps;
struct ggml_tensor * ffn_up_exps ; struct ggml_tensor * ffn_up_exps ;
// ff shared expert // ff shared expert (shexp)
struct ggml_tensor * ffn_gate_inp_shared_exp; struct ggml_tensor * ffn_gate_inp_shexp;
struct ggml_tensor * ffn_gate_shared_exp; struct ggml_tensor * ffn_gate_shexp;
struct ggml_tensor * ffn_down_shared_exp; struct ggml_tensor * ffn_down_shexp;
struct ggml_tensor * ffn_up_shared_exp; struct ggml_tensor * ffn_up_shexp;
// ff bias // ff bias
struct ggml_tensor * ffn_down_b; // b2 struct ggml_tensor * ffn_down_b; // b2
@ -5205,10 +5205,10 @@ static bool llm_load_tensors(
layer.ffn_up_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert}); layer.ffn_up_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert});
// Shared expert branch // Shared expert branch
layer.ffn_gate_inp_shared_exp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP_SHARED_EXP, "weight", i), {n_embd}); layer.ffn_gate_inp_shexp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP_SHEXP, "weight", i), {n_embd});
layer.ffn_gate_shared_exp = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE_SHARED_EXP, "weight", i), {n_embd, n_ff}); layer.ffn_gate_shexp = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE_SHEXP, "weight", i), {n_embd, n_ff});
layer.ffn_down_shared_exp = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN_SHARED_EXP, "weight", i), { n_ff, n_embd}); layer.ffn_down_shexp = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN_SHEXP, "weight", i), { n_ff, n_embd});
layer.ffn_up_shared_exp = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP_SHARED_EXP, "weight", i), {n_embd, n_ff}); layer.ffn_up_shexp = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP_SHEXP, "weight", i), {n_embd, n_ff});
} }
} break; } break;
case LLM_ARCH_PHI2: case LLM_ARCH_PHI2:
@ -8622,31 +8622,31 @@ struct llm_build_context {
} }
} }
ggml_tensor * gate_shared_exp = ggml_mul_mat(ctx0, model.layers[il].ffn_gate_inp_shared_exp, cur); // FFN shared expert
cb(gate_shared_exp, "ffn_moe_gate_inp_shared_exp", il); {
ggml_tensor * cur_gate_inp = ggml_mul_mat(ctx0, model.layers[il].ffn_gate_inp_shexp, cur);
cb(cur_gate_inp, "ffn_shexp_gate_inp", il);
// sigmoid // sigmoid
ggml_tensor * logits_shared_exp = ggml_silu(ctx0, gate_shared_exp); ggml_tensor * cur_gate = ggml_div(ctx0, ggml_silu(ctx0, cur_gate_inp), cur_gate_inp);
cb(logits_shared_exp, "ffn_moe_logits_shared_exp", il); cb(cur_gate, "ffn_shexp_gate", il);
ggml_tensor * probs_shared_exp = ggml_div(ctx0, logits_shared_exp, gate_shared_exp); ggml_tensor * cur_ffn = llm_build_ffn(ctx0, cur,
cb(probs_shared_exp, "ffn_moe_probs_shared_exp", il); model.layers[il].ffn_up_shexp, NULL,
model.layers[il].ffn_gate_shexp, NULL,
ggml_tensor * ffn_shared_exp = llm_build_ffn(ctx0, cur, model.layers[il].ffn_down_shexp, NULL,
model.layers[il].ffn_up_shared_exp, NULL,
model.layers[il].ffn_gate_shared_exp, NULL,
model.layers[il].ffn_down_shared_exp, NULL,
NULL, NULL,
LLM_FFN_SILU, LLM_FFN_PAR, cb, il); LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
cb(ffn_shared_exp, "ffn_moe_shared_exp", il); cb(cur_ffn, "ffn_shexp", il);
ggml_tensor * ffn_shared_exp_out = ggml_mul(ctx0, ffn_shared_exp, probs_shared_exp); ggml_tensor * ffn_shexp_out = ggml_mul(ctx0, cur_ffn, cur_gate);
cb(ffn_shared_exp_out, "ffn_moe_shared_exp_out", il); cb(ffn_shexp_out, "ffn_shexp_out", il);
moe_out = ggml_add(ctx0, moe_out, ffn_shared_exp_out); moe_out = ggml_add(ctx0, moe_out, ffn_shexp_out);
cb(moe_out, "ffn_out", il); cb(moe_out, "ffn_out", il);
cur = moe_out; cur = moe_out;
}
cur = ggml_add(ctx0, cur, ffn_inp); cur = ggml_add(ctx0, cur, ffn_inp);
cb(cur, "l_out", il); cb(cur, "l_out", il);