From 2479900a1ca1b1dff3b5d2b7ff2dcd1ef29b012d Mon Sep 17 00:00:00 2001 From: slaren Date: Fri, 29 Mar 2024 20:41:27 +0100 Subject: [PATCH] minor --- ggml.c | 2 +- llama.cpp | 21 ++++++--------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/ggml.c b/ggml.c index e212d060b..eb2ea6af0 100644 --- a/ggml.c +++ b/ggml.c @@ -10989,7 +10989,7 @@ static void ggml_compute_forward_mul_mat_id( // row groups const int id = ggml_get_op_params_i32(dst, 0); - const int n_as = src0->ne[2]; //ggml_get_op_params_i32(dst, 1); + const int n_as = src0->ne[2]; char * wdata_src1_end = (src1->type == vec_dot_type) ? (char *) params->wdata : diff --git a/llama.cpp b/llama.cpp index 6abf9f354..ae6985754 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1868,10 +1868,6 @@ struct llama_layer { struct ggml_tensor * ffn_down_exps;//[LLAMA_MAX_EXPERTS]; struct ggml_tensor * ffn_up_exps ;//[LLAMA_MAX_EXPERTS]; - struct ggml_tensor * ffn_gate_exp[LLAMA_MAX_EXPERTS]; - struct ggml_tensor * ffn_down_exp[LLAMA_MAX_EXPERTS]; - struct ggml_tensor * ffn_up_exp [LLAMA_MAX_EXPERTS]; - // ff bias struct ggml_tensor * ffn_down_b; // b2 struct ggml_tensor * ffn_up_b; // b3 @@ -4477,21 +4473,16 @@ static bool llm_load_tensors( // MoE branch for (uint32_t x = 0; x < hparams.n_expert; ++x) { - // hack // individual tensors as views - layer.ffn_gate_exp[x] = ggml_view_2d(ctx_split, layer.ffn_gate_exps, n_embd, n_ff, layer.ffn_gate_exps->nb[1], layer.ffn_gate_exps->nb[2]*x); - layer.ffn_down_exp[x] = ggml_view_2d(ctx_split, layer.ffn_down_exps, n_ff, n_embd, layer.ffn_down_exps->nb[1], layer.ffn_down_exps->nb[2]*x); - layer.ffn_up_exp[x] = ggml_view_2d(ctx_split, layer.ffn_up_exps, n_embd, n_ff, layer.ffn_up_exps->nb[1], layer.ffn_up_exps->nb[2]*x); + ggml_tensor * ffn_gate_exp = ggml_view_2d(ctx_split, layer.ffn_gate_exps, n_embd, n_ff, layer.ffn_gate_exps->nb[1], layer.ffn_gate_exps->nb[2]*x); + ggml_tensor * ffn_down_exp = ggml_view_2d(ctx_split, layer.ffn_down_exps, n_ff, n_embd, layer.ffn_down_exps->nb[1], layer.ffn_down_exps->nb[2]*x); + ggml_tensor * ffn_up_exp = ggml_view_2d(ctx_split, layer.ffn_up_exps, n_embd, n_ff, layer.ffn_up_exps->nb[1], layer.ffn_up_exps->nb[2]*x); - ggml_set_name(layer.ffn_gate_exp[x], tn(LLM_TENSOR_FFN_GATE_EXP, "weight", i, x).c_str()); - ggml_set_name(layer.ffn_down_exp[x], tn(LLM_TENSOR_FFN_DOWN_EXP, "weight", i, x).c_str()); - ggml_set_name(layer.ffn_up_exp[x], tn(LLM_TENSOR_FFN_UP_EXP, "weight", i, x).c_str()); + ggml_set_name(ffn_gate_exp, tn(LLM_TENSOR_FFN_GATE_EXP, "weight", i, x).c_str()); + ggml_set_name(ffn_down_exp, tn(LLM_TENSOR_FFN_DOWN_EXP, "weight", i, x).c_str()); + ggml_set_name(ffn_up_exp, tn(LLM_TENSOR_FFN_UP_EXP, "weight", i, x).c_str()); ml.n_created += 3; // hack - - //layer.ffn_gate_exp[x] = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE_EXP, "weight", i, x), {n_embd, n_ff}); - //layer.ffn_down_exp[x] = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN_EXP, "weight", i, x), { n_ff, n_embd}); - //layer.ffn_up_exp[x] = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP_EXP, "weight", i, x), {n_embd, n_ff}); } } }