Merge branch 'master' into xsn/fix_lora
This commit is contained in:
commit
b704448afb
1 changed files with 45 additions and 50 deletions
|
@ -7952,8 +7952,8 @@ static struct ggml_tensor * llm_build_norm(
|
|||
}
|
||||
|
||||
static struct ggml_tensor * llm_build_ffn(
|
||||
struct llama_context & lctx,
|
||||
struct ggml_context * ctx,
|
||||
struct llama_context & lctx,
|
||||
struct ggml_tensor * cur,
|
||||
struct ggml_tensor * up,
|
||||
struct ggml_tensor * up_b,
|
||||
|
@ -8859,7 +8859,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -8993,7 +8993,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -9098,7 +9098,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -9214,7 +9214,7 @@ struct llm_build_context {
|
|||
|
||||
// feed forward
|
||||
{
|
||||
cur = llm_build_ffn(lctx, ctx0, attn_norm, // !! use the attn norm, not the result
|
||||
cur = llm_build_ffn(ctx0, lctx, attn_norm, // !! use the attn norm, not the result
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -9605,7 +9605,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -9697,7 +9697,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -9882,21 +9882,21 @@ struct llm_build_context {
|
|||
|
||||
// feed-forward network
|
||||
if (model.arch == LLM_ARCH_BERT) {
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
NULL,
|
||||
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
|
||||
} else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
NULL,
|
||||
LLM_FFN_GELU, LLM_FFN_PAR, cb, il);
|
||||
} else {
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -9994,7 +9994,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -10132,7 +10132,7 @@ struct llm_build_context {
|
|||
model.layers[il].ffn_norm_b,
|
||||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -10281,7 +10281,7 @@ struct llm_build_context {
|
|||
// parallel residual
|
||||
cur = inpSA;
|
||||
}
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -10394,7 +10394,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -10507,7 +10507,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -10644,7 +10644,7 @@ struct llm_build_context {
|
|||
ggml_tensor * cur_gate = ggml_div(ctx0, ggml_silu(ctx0, cur_gate_inp), cur_gate_inp);
|
||||
cb(cur_gate, "ffn_shexp_gate", il);
|
||||
|
||||
ggml_tensor * cur_ffn = llm_build_ffn(lctx, ctx0, cur,
|
||||
ggml_tensor * cur_ffn = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up_shexp, NULL, NULL,
|
||||
model.layers[il].ffn_gate_shexp, NULL, NULL,
|
||||
model.layers[il].ffn_down_shexp, NULL, NULL,
|
||||
|
@ -10773,7 +10773,7 @@ struct llm_build_context {
|
|||
|
||||
// FF
|
||||
{
|
||||
ffn_output = llm_build_ffn(lctx, ctx0, attn_norm_output,
|
||||
ffn_output = llm_build_ffn(ctx0, lctx, attn_norm_output,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -10902,7 +10902,7 @@ struct llm_build_context {
|
|||
// special-case: the up and gate tensors are merged into a single tensor
|
||||
// TOOD: support into llm_build_ffn
|
||||
{
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -11004,7 +11004,7 @@ struct llm_build_context {
|
|||
|
||||
// feed-forward network
|
||||
{
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -11112,7 +11112,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -11223,7 +11223,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -11340,7 +11340,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -11458,7 +11458,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -11595,7 +11595,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -11712,7 +11712,7 @@ struct llm_build_context {
|
|||
|
||||
// feed-forward network
|
||||
{
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -11835,7 +11835,7 @@ struct llm_build_context {
|
|||
|
||||
// feed-forward network
|
||||
{
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -11967,7 +11967,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -12255,7 +12255,7 @@ struct llm_build_context {
|
|||
|
||||
// feed-forward network
|
||||
{
|
||||
cur = llm_build_ffn(lctx, ctx0, ffn_inp,
|
||||
cur = llm_build_ffn(ctx0, lctx, ffn_inp,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -12392,7 +12392,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -12519,7 +12519,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -12631,7 +12631,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -12662,7 +12662,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -12771,7 +12771,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -12987,13 +12987,13 @@ struct llm_build_context {
|
|||
struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
|
||||
cb(ffn_inp, "ffn_inp", il);
|
||||
|
||||
if ((uint32_t) il < hparams.n_layer_dense_lead) {
|
||||
cur = llm_build_norm(ctx0, ffn_inp, hparams,
|
||||
model.layers[il].ffn_norm, NULL,
|
||||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
if ((uint32_t) il < hparams.n_layer_dense_lead) {
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -13002,11 +13002,6 @@ struct llm_build_context {
|
|||
cb(cur, "ffn_out", il);
|
||||
} else {
|
||||
// MoE branch
|
||||
cur = llm_build_norm(ctx0, ffn_inp, hparams,
|
||||
model.layers[il].ffn_norm, NULL,
|
||||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
ggml_tensor * moe_out =
|
||||
llm_build_moe_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_gate_inp,
|
||||
|
@ -13021,7 +13016,7 @@ struct llm_build_context {
|
|||
|
||||
// FFN shared expert
|
||||
{
|
||||
ggml_tensor * ffn_shexp = llm_build_ffn(lctx, ctx0, cur,
|
||||
ggml_tensor * ffn_shexp = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up_shexp, NULL, NULL,
|
||||
model.layers[il].ffn_gate_shexp, NULL, NULL,
|
||||
model.layers[il].ffn_down_shexp, NULL, NULL,
|
||||
|
@ -13159,7 +13154,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, model.layers[il].ffn_up_scale,
|
||||
model.layers[il].ffn_gate, NULL, model.layers[il].ffn_gate_scale,
|
||||
NULL, NULL, NULL,
|
||||
|
@ -13293,7 +13288,7 @@ struct llm_build_context {
|
|||
cb(cur, "ffn_norm", il);
|
||||
|
||||
// T5 uses relu, flan-T5 uses gelu-gated
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up_enc, NULL, NULL,
|
||||
model.layers[il].ffn_gate_enc, NULL, NULL,
|
||||
model.layers[il].ffn_down_enc, NULL, NULL,
|
||||
|
@ -13473,7 +13468,7 @@ struct llm_build_context {
|
|||
cb(cur, "ffn_norm", il);
|
||||
|
||||
// T5 uses relu, flan-T5 uses gelu-gated
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
@ -13579,7 +13574,7 @@ struct llm_build_context {
|
|||
LLM_NORM, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
|
@ -13693,7 +13688,7 @@ struct llm_build_context {
|
|||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(lctx, ctx0, cur,
|
||||
cur = llm_build_ffn(ctx0, lctx, cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
NULL, NULL, NULL,
|
||||
model.layers[il].ffn_down, NULL, NULL,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue