llama: dbrx: no attention output layer
This commit is contained in:
parent
76f266beef
commit
9c7dedb0f3
1 changed files with 47 additions and 46 deletions
|
@ -7165,7 +7165,7 @@ struct llm_build_context {
|
||||||
cb(Vcur, "Vcur", il);
|
cb(Vcur, "Vcur", il);
|
||||||
|
|
||||||
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
||||||
model.layers[il].wo, model.layers[il].bo,
|
NULL, NULL,
|
||||||
Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f, cb, il);
|
Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f, cb, il);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7182,6 +7182,7 @@ struct llm_build_context {
|
||||||
|
|
||||||
// feed-forward network
|
// feed-forward network
|
||||||
// MoE branch
|
// MoE branch
|
||||||
|
{
|
||||||
// FIXME REVIEW: I do not see this op in https://huggingface.co/databricks/dbrx-instruct/blob/464e701f50aef4c1b59c81fb5667819a5d08e108/modeling_dbrx.py#L727
|
// FIXME REVIEW: I do not see this op in https://huggingface.co/databricks/dbrx-instruct/blob/464e701f50aef4c1b59c81fb5667819a5d08e108/modeling_dbrx.py#L727
|
||||||
cur = llm_build_norm(ctx0, ffn_inp, hparams,
|
cur = llm_build_norm(ctx0, ffn_inp, hparams,
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
|
@ -7212,7 +7213,6 @@ struct llm_build_context {
|
||||||
|
|
||||||
// compute expert outputs
|
// compute expert outputs
|
||||||
ggml_tensor * moe_out = nullptr;
|
ggml_tensor * moe_out = nullptr;
|
||||||
|
|
||||||
for (int i = 0; i < n_expert_used; ++i) {
|
for (int i = 0; i < n_expert_used; ++i) {
|
||||||
ggml_tensor * cur_expert;
|
ggml_tensor * cur_expert;
|
||||||
|
|
||||||
|
@ -7233,7 +7233,7 @@ struct llm_build_context {
|
||||||
cb(cur_expert, "ffn_moe_down", il);
|
cb(cur_expert, "ffn_moe_down", il);
|
||||||
|
|
||||||
cur_expert = ggml_mul(ctx0, cur_expert,
|
cur_expert = ggml_mul(ctx0, cur_expert,
|
||||||
ggml_view_2d(ctx0, weights, 1, n_tokens, weights->nb[1], i*weights->nb[0]));
|
ggml_view_2d(ctx0, weights, 1, n_tokens, weights->nb[1], i * weights->nb[0]));
|
||||||
cb(cur_expert, "ffn_moe_weighted", il);
|
cb(cur_expert, "ffn_moe_weighted", il);
|
||||||
|
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
|
@ -7244,6 +7244,7 @@ struct llm_build_context {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cur = moe_out;
|
cur = moe_out;
|
||||||
|
}
|
||||||
|
|
||||||
// DbrxNormAttentionNorm
|
// DbrxNormAttentionNorm
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue