From 4256fe6a1ac8e3e132fbf2c73229e1ac8edc44a0 Mon Sep 17 00:00:00 2001 From: simonJJJ <821898965@qq.com> Date: Tue, 16 Apr 2024 03:36:03 +0800 Subject: [PATCH] fix-review --- ggml-backend.c | 2 -- ggml.h | 2 +- llama.cpp | 29 +++++++++++++++-------------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/ggml-backend.c b/ggml-backend.c index 5ece131cf..402d86ef3 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1006,7 +1006,6 @@ static bool ggml_is_view_op(enum ggml_op op) { // scheduler - #ifndef GGML_SCHED_MAX_BACKENDS #define GGML_SCHED_MAX_BACKENDS 16 #endif @@ -1023,7 +1022,6 @@ static bool ggml_is_view_op(enum ggml_op op) { #define GGML_SCHED_MAX_COPIES 4 #endif - struct ggml_backend_sched_split { int backend_id; int i_start; diff --git a/ggml.h b/ggml.h index 42a7d814a..e9ed8eeee 100644 --- a/ggml.h +++ b/ggml.h @@ -228,7 +228,7 @@ #define GGML_MAX_DIMS 4 #define GGML_MAX_PARAMS 2048 #define GGML_MAX_CONTEXTS 64 -#define GGML_MAX_SRC 62 +#define GGML_MAX_SRC 10 #ifndef GGML_MAX_NAME #define GGML_MAX_NAME 64 #endif diff --git a/llama.cpp b/llama.cpp index 39f94b4f2..14048d726 100644 --- a/llama.cpp +++ b/llama.cpp @@ -746,19 +746,19 @@ static const std::map> LLM_TENSOR_NA { LLM_ARCH_QWEN2MOE, { - { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, - { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, - { LLM_TENSOR_OUTPUT, "output" }, - { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, - { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, - { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, - { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, - { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, - { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, - { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" }, - { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" }, - { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" }, - { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" }, + { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" }, + { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" }, + { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, { LLM_TENSOR_FFN_GATE_INP_SHARED_EXP, "blk.%d.ffn_gate_inp_shared_exp" }, { LLM_TENSOR_FFN_GATE_SHARED_EXP, "blk.%d.ffn_gate_shared_exp" }, { LLM_TENSOR_FFN_DOWN_SHARED_EXP, "blk.%d.ffn_down_shared_exp" }, @@ -1751,6 +1751,7 @@ enum e_model { MODEL_MEDIUM, MODEL_LARGE, MODEL_XL, + MODEL_A2_7B, MODEL_8x7B, MODEL_8x22B, MODEL_16x12B, @@ -3916,7 +3917,7 @@ static void llm_load_hparams( { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); switch (hparams.n_layer) { - case 24: model.type = e_model::MODEL_1B; break; + case 24: model.type = e_model::MODEL_A2_7B; break; default: model.type = e_model::MODEL_UNKNOWN; } } break;