minor
This commit is contained in:
parent
9c9fe60f53
commit
2479900a1c
2 changed files with 7 additions and 16 deletions
2
ggml.c
2
ggml.c
|
@ -10989,7 +10989,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|||
|
||||
// row groups
|
||||
const int id = ggml_get_op_params_i32(dst, 0);
|
||||
const int n_as = src0->ne[2]; //ggml_get_op_params_i32(dst, 1);
|
||||
const int n_as = src0->ne[2];
|
||||
|
||||
char * wdata_src1_end = (src1->type == vec_dot_type) ?
|
||||
(char *) params->wdata :
|
||||
|
|
21
llama.cpp
21
llama.cpp
|
@ -1868,10 +1868,6 @@ struct llama_layer {
|
|||
struct ggml_tensor * ffn_down_exps;//[LLAMA_MAX_EXPERTS];
|
||||
struct ggml_tensor * ffn_up_exps ;//[LLAMA_MAX_EXPERTS];
|
||||
|
||||
struct ggml_tensor * ffn_gate_exp[LLAMA_MAX_EXPERTS];
|
||||
struct ggml_tensor * ffn_down_exp[LLAMA_MAX_EXPERTS];
|
||||
struct ggml_tensor * ffn_up_exp [LLAMA_MAX_EXPERTS];
|
||||
|
||||
// ff bias
|
||||
struct ggml_tensor * ffn_down_b; // b2
|
||||
struct ggml_tensor * ffn_up_b; // b3
|
||||
|
@ -4477,21 +4473,16 @@ static bool llm_load_tensors(
|
|||
|
||||
// MoE branch
|
||||
for (uint32_t x = 0; x < hparams.n_expert; ++x) {
|
||||
// hack
|
||||
// individual tensors as views
|
||||
layer.ffn_gate_exp[x] = ggml_view_2d(ctx_split, layer.ffn_gate_exps, n_embd, n_ff, layer.ffn_gate_exps->nb[1], layer.ffn_gate_exps->nb[2]*x);
|
||||
layer.ffn_down_exp[x] = ggml_view_2d(ctx_split, layer.ffn_down_exps, n_ff, n_embd, layer.ffn_down_exps->nb[1], layer.ffn_down_exps->nb[2]*x);
|
||||
layer.ffn_up_exp[x] = ggml_view_2d(ctx_split, layer.ffn_up_exps, n_embd, n_ff, layer.ffn_up_exps->nb[1], layer.ffn_up_exps->nb[2]*x);
|
||||
ggml_tensor * ffn_gate_exp = ggml_view_2d(ctx_split, layer.ffn_gate_exps, n_embd, n_ff, layer.ffn_gate_exps->nb[1], layer.ffn_gate_exps->nb[2]*x);
|
||||
ggml_tensor * ffn_down_exp = ggml_view_2d(ctx_split, layer.ffn_down_exps, n_ff, n_embd, layer.ffn_down_exps->nb[1], layer.ffn_down_exps->nb[2]*x);
|
||||
ggml_tensor * ffn_up_exp = ggml_view_2d(ctx_split, layer.ffn_up_exps, n_embd, n_ff, layer.ffn_up_exps->nb[1], layer.ffn_up_exps->nb[2]*x);
|
||||
|
||||
ggml_set_name(layer.ffn_gate_exp[x], tn(LLM_TENSOR_FFN_GATE_EXP, "weight", i, x).c_str());
|
||||
ggml_set_name(layer.ffn_down_exp[x], tn(LLM_TENSOR_FFN_DOWN_EXP, "weight", i, x).c_str());
|
||||
ggml_set_name(layer.ffn_up_exp[x], tn(LLM_TENSOR_FFN_UP_EXP, "weight", i, x).c_str());
|
||||
ggml_set_name(ffn_gate_exp, tn(LLM_TENSOR_FFN_GATE_EXP, "weight", i, x).c_str());
|
||||
ggml_set_name(ffn_down_exp, tn(LLM_TENSOR_FFN_DOWN_EXP, "weight", i, x).c_str());
|
||||
ggml_set_name(ffn_up_exp, tn(LLM_TENSOR_FFN_UP_EXP, "weight", i, x).c_str());
|
||||
|
||||
ml.n_created += 3; // hack
|
||||
|
||||
//layer.ffn_gate_exp[x] = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE_EXP, "weight", i, x), {n_embd, n_ff});
|
||||
//layer.ffn_down_exp[x] = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN_EXP, "weight", i, x), { n_ff, n_embd});
|
||||
//layer.ffn_up_exp[x] = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP_EXP, "weight", i, x), {n_embd, n_ff});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue