From ae86b5e3d9fb9f80fd5acb728c2010dbca34c6d7 Mon Sep 17 00:00:00 2001 From: Yoshi Suhara Date: Sat, 10 Aug 2024 22:17:55 -0700 Subject: [PATCH] Replace ggml_mul_mat()->llm_build_lora_mm() --- src/llama.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 1303eda41..06e0d6473 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -13861,14 +13861,14 @@ struct llm_build_context { cb(Qcur, "Qcur", il); } - struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur); + struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk, cur); cb(Kcur, "Kcur", il); if (model.layers[il].bk) { Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); cb(Kcur, "Kcur", il); } - struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur); + struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur); cb(Vcur, "Vcur", il); if (model.layers[il].bv) { Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); @@ -13937,7 +13937,7 @@ struct llm_build_context { cb(cur, "result_norm", -1); // lm_head - cur = ggml_mul_mat(ctx0, model.output, cur); + cur = llm_build_lora_mm(lctx, ctx0, model.output, cur); cb(cur, "result_output", -1); ggml_build_forward_expand(gf, cur);