This commit is contained in:
Lorenzo Toniazzi 2024-07-08 08:41:03 +01:00
parent 6597a72c1d
commit e481eb5559

View file

@ -2545,7 +2545,7 @@ struct llama_context {
llama_cparams cparams; llama_cparams cparams;
std::map<std::string, lora_weights> lora_weights_map; // only one LoRA adapter at the moment std::map<std::string, lora_weights> lora_weights_map; // only one LoRA adapter at the moment
lora_data llora_data; lora_data llama_lora_data;
float lora_scale = 1.0f; float lora_scale = 1.0f;
std::vector<ggml_backend_t> backends; std::vector<ggml_backend_t> backends;
@ -7699,21 +7699,21 @@ struct llm_build_context {
// self-attention // self-attention
{ {
// compute Q and K and RoPE them // compute Q and K and RoPE them
struct ggml_tensor * Qcur = lora_mul_mat(lctx, ctx0, model.layers[il].wq, cur); struct ggml_tensor * Qcur = ggml_mul_mat_lora(lctx, ctx0, model.layers[il].wq, cur);
cb(Qcur, "Qcur", il); cb(Qcur, "Qcur", il);
if (model.layers[il].bq) { if (model.layers[il].bq) {
Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
cb(Qcur, "Qcur", il); cb(Qcur, "Qcur", il);
} }
struct ggml_tensor * Kcur = lora_mul_mat(lctx, ctx0, model.layers[il].wk, cur); struct ggml_tensor * Kcur = ggml_mul_mat_lora(lctx, ctx0, model.layers[il].wk, cur);
cb(Kcur, "Kcur", il); cb(Kcur, "Kcur", il);
if (model.layers[il].bk) { if (model.layers[il].bk) {
Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
cb(Kcur, "Kcur", il); cb(Kcur, "Kcur", il);
} }
struct ggml_tensor * Vcur = lora_mul_mat(lctx, ctx0, model.layers[il].wv, cur); struct ggml_tensor * Vcur = ggml_mul_mat_lora(lctx, ctx0, model.layers[il].wv, cur);
cb(Vcur, "Vcur", il); cb(Vcur, "Vcur", il);
if (model.layers[il].bv) { if (model.layers[il].bv) {
Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
@ -9722,7 +9722,7 @@ struct llm_build_context {
return gf; return gf;
} }
static ggml_tensor * lora_mul_mat( static ggml_tensor * ggml_mul_mat_lora(
llama_context & lctx, llama_context & lctx,
ggml_context * ctx0, ggml_context * ctx0,
ggml_tensor * weight, ggml_tensor * weight,
@ -16372,17 +16372,17 @@ struct llama_context * llama_new_context_with_model(
// load all loras (only 1 supported here) // load all loras (only 1 supported here)
std::vector<struct lora_data *> loras; std::vector<struct lora_data *> loras;
for (size_t i = 0; i < lora_params->lora.size(); ++i) { for (size_t i = 0; i < lora_params->lora.size(); ++i) {
struct lora_data * llora_data = load_lora(&lora_params->lora[i]); struct lora_data * llama_lora_data = load_lora(&lora_params->lora[i]);
if (llora_data != NULL) { if (llama_lora_data != NULL) {
loras.push_back(llora_data); loras.push_back(llama_lora_data);
} }
} }
if (loras.size() == 0) { if (loras.size() == 0) {
fprintf(stderr, "warning: no lora adapters will be applied.\n"); fprintf(stderr, "warning: no lora adapters will be applied.\n");
} }
// Assign data and get mapping (index 0 as only 1 lora is supoprted now) // Assign data and get mapping (index 0 as only 1 lora is supoprted now)
ctx->llora_data = *loras[0]; ctx->llama_lora_data = *loras[0];
ctx->lora_weights_map = get_lora_weights_map((ctx->llora_data).ctx); ctx->lora_weights_map = get_lora_weights_map((ctx->llama_lora_data).ctx);
} }
/// LORA load end /// LORA load end