llama.cpp: Include the changes from #6122 to exclude the unused outputs of the last layers.
This commit is contained in:
parent
3c0b830808
commit
e4a16f2493
1 changed files with 7 additions and 0 deletions
|
@ -6525,6 +6525,13 @@ struct llm_build_context {
|
||||||
cb(cur, "kqv_out", il);
|
cb(cur, "kqv_out", il);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (il == n_layer - 1) {
|
||||||
|
// skip computing output for unused tokens
|
||||||
|
struct ggml_tensor * inp_out_ids = build_inp_out_ids();
|
||||||
|
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
||||||
|
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
|
||||||
|
}
|
||||||
|
|
||||||
struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
|
struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
|
||||||
cb(ffn_inp, "ffn_inp", il);
|
cb(ffn_inp, "ffn_inp", il);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue