Add optional MLP bias for Granite models
Add optional MLP bias for ARCH_LLAMA to support Granite models. Partially addresses ggerganov/llama.cpp/issues/7116 Still needs some more changes to properly support Granite.
This commit is contained in:
parent
d041d2ceaa
commit
120f7bf527
1 changed files with 11 additions and 5 deletions
16
llama.cpp
16
llama.cpp
|
@ -1953,8 +1953,9 @@ struct llama_layer {
|
|||
struct ggml_tensor * ffn_up_shexp;
|
||||
|
||||
// ff bias
|
||||
struct ggml_tensor * ffn_down_b; // b2
|
||||
struct ggml_tensor * ffn_up_b; // b3
|
||||
struct ggml_tensor * ffn_gate_b = nullptr;
|
||||
struct ggml_tensor * ffn_down_b = nullptr; // b2
|
||||
struct ggml_tensor * ffn_up_b = nullptr; // b3
|
||||
struct ggml_tensor * ffn_act;
|
||||
|
||||
// mamba proj
|
||||
|
@ -5103,6 +5104,11 @@ static bool llm_load_tensors(
|
|||
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
|
||||
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
|
||||
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
||||
|
||||
// optional MLP bias
|
||||
layer.ffn_gate_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_down_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_up_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||
} else {
|
||||
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});
|
||||
|
||||
|
@ -7305,9 +7311,9 @@ struct llm_build_context {
|
|||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(ctx0, cur,
|
||||
model.layers[il].ffn_up, NULL,
|
||||
model.layers[il].ffn_gate, NULL,
|
||||
model.layers[il].ffn_down, NULL,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b,
|
||||
model.layers[il].ffn_gate, model.layers[il].ffn_gate_b,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b,
|
||||
NULL,
|
||||
LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
|
||||
cb(cur, "ffn_out", il);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue