From 7ffbcbdfa3fc8551ee1c909b45576f09ef0cc2af Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Wed, 26 Apr 2023 23:29:26 +0800 Subject: [PATCH 1/4] fix --- llama.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama.cpp b/llama.cpp index 25203c9e9..aaf6ab77e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2017,14 +2017,14 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char * ggml_tensor * loraA = lora_tensors[base_name + ".loraA"]; ggml_tensor * loraB = lora_tensors[base_name + ".loraB"]; - if (base_t->ne[0] != loraA->ne[1] || base_t->ne[1] != loraB->ne[1]) { - fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");" - " are you sure that this adapter is for this model?\n", __func__, base_t->ne[0], loraA->ne[1]); + if (base_t->ne[0] != loraB->ne[1] || base_t->ne[1] != loraA->ne[1]) { + fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: );" + " are you sure that this adapter is for this model?\n", __func__, base_t->ne[0], loraB->ne[1], base_t->ne[1], loraA->ne[1]); return 1; } // w = w + BA*s - ggml_tensor * BA = ggml_mul_mat(lora_ctx, loraA, loraB); + ggml_tensor * BA = ggml_mul_mat(lora_ctx, loraB, loraA); if (scaling != 1.0f) { ggml_tensor * scale_tensor = ggml_new_f32(lora_ctx, scaling); From b80bc36ab01052317850006336b1356891b20512 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Wed, 26 Apr 2023 23:33:24 +0800 Subject: [PATCH 2/4] minor --- llama.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llama.cpp b/llama.cpp index aaf6ab77e..8c1385ad6 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2018,13 +2018,13 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char * ggml_tensor * loraB = lora_tensors[base_name + ".loraB"]; if (base_t->ne[0] != loraB->ne[1] || base_t->ne[1] != loraA->ne[1]) { - fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: );" - " are you sure that this adapter is for this model?\n", __func__, base_t->ne[0], loraB->ne[1], base_t->ne[1], loraA->ne[1]); + fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: %" PRId64 ", %" PRId64 ");" + " are you sure that this adapter is for this model?\n", __func__, base_t->ne[1], loraB->ne[1], base_t->ne[0], loraA->ne[1]); return 1; } // w = w + BA*s - ggml_tensor * BA = ggml_mul_mat(lora_ctx, loraB, loraA); + ggml_tensor * BA = ggml_mul_mat(lora_ctx, loraA, loraB); if (scaling != 1.0f) { ggml_tensor * scale_tensor = ggml_new_f32(lora_ctx, scaling); From 9eda98d14be77de1b898350b49af7cc1717a5e35 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Thu, 27 Apr 2023 00:41:12 +0800 Subject: [PATCH 3/4] fix --- llama.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 8c1385ad6..ef08e3afd 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2017,7 +2017,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char * ggml_tensor * loraA = lora_tensors[base_name + ".loraA"]; ggml_tensor * loraB = lora_tensors[base_name + ".loraB"]; - if (base_t->ne[0] != loraB->ne[1] || base_t->ne[1] != loraA->ne[1]) { + // base indim = loraA transposed indim, base outdim = loraB outdim + if (base_t->ne[0] != loraA->ne[1] || base_t->ne[1] != loraB->ne[1]) { fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: %" PRId64 ", %" PRId64 ");" " are you sure that this adapter is for this model?\n", __func__, base_t->ne[1], loraB->ne[1], base_t->ne[0], loraA->ne[1]); return 1; From 6383bbfa5f234a5c6429082d15dea3c01608d5b4 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Thu, 27 Apr 2023 00:42:41 +0800 Subject: [PATCH 4/4] fix --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index ef08e3afd..8532a7ba7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2019,7 +2019,7 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char * // base indim = loraA transposed indim, base outdim = loraB outdim if (base_t->ne[0] != loraA->ne[1] || base_t->ne[1] != loraB->ne[1]) { - fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: %" PRId64 ", %" PRId64 ");" + fprintf(stderr, "%s: incompatible tensor dimensions (outdim: %" PRId64 ", %" PRId64 ", indim: %" PRId64 ", %" PRId64 ");" " are you sure that this adapter is for this model?\n", __func__, base_t->ne[1], loraB->ne[1], base_t->ne[0], loraA->ne[1]); return 1; }