adapt to new lora implementation
This commit is contained in:
parent
fa568f6a82
commit
f40cd2073a
1 changed files with 4 additions and 4 deletions
|
@ -13848,13 +13848,13 @@ struct llm_build_context {
|
||||||
// self-attention
|
// self-attention
|
||||||
{
|
{
|
||||||
// compute Q and K and RoPE them
|
// compute Q and K and RoPE them
|
||||||
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur);
|
struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur);
|
||||||
cb(Qcur, "Qcur", il);
|
cb(Qcur, "Qcur", il);
|
||||||
|
|
||||||
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur);
|
struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk, cur);
|
||||||
cb(Kcur, "Kcur", il);
|
cb(Kcur, "Kcur", il);
|
||||||
|
|
||||||
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur);
|
struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur);
|
||||||
cb(Vcur, "Vcur", il);
|
cb(Vcur, "Vcur", il);
|
||||||
|
|
||||||
if (model.layers[il].attn_q_norm) {
|
if (model.layers[il].attn_q_norm) {
|
||||||
|
@ -13962,7 +13962,7 @@ struct llm_build_context {
|
||||||
cb(cur, "result_norm", -1);
|
cb(cur, "result_norm", -1);
|
||||||
|
|
||||||
// lm_head
|
// lm_head
|
||||||
cur = ggml_mul_mat(ctx0, model.output, cur);
|
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
|
||||||
cb(cur, "result_output_with_img_logits", -1);
|
cb(cur, "result_output_with_img_logits", -1);
|
||||||
|
|
||||||
// TODO: this suppresses the output of image tokens, which is required to enable text-only outputs.
|
// TODO: this suppresses the output of image tokens, which is required to enable text-only outputs.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue