From 7ffbcbdfa3fc8551ee1c909b45576f09ef0cc2af Mon Sep 17 00:00:00 2001
From: jon-chuang <jon-chuang@users.noreply.github.com>
Date: Wed, 26 Apr 2023 23:29:26 +0800
Subject: [PATCH 1/4] fix

---
 llama.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index 25203c9e9..aaf6ab77e 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2017,14 +2017,14 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
             ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
             ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];
 
-            if (base_t->ne[0] != loraA->ne[1] || base_t->ne[1] != loraB->ne[1]) {
-                fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");"
-                               " are you sure that this adapter is for this model?\n", __func__, base_t->ne[0], loraA->ne[1]);
+            if (base_t->ne[0] != loraB->ne[1] || base_t->ne[1] != loraA->ne[1]) {
+                fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: );"
+                               " are you sure that this adapter is for this model?\n", __func__, base_t->ne[0], loraB->ne[1], base_t->ne[1], loraA->ne[1]);
                 return 1;
             }
 
             // w = w + BA*s
-            ggml_tensor * BA = ggml_mul_mat(lora_ctx, loraA, loraB);
+            ggml_tensor * BA = ggml_mul_mat(lora_ctx, loraB, loraA);
 
             if (scaling != 1.0f) {
                 ggml_tensor * scale_tensor = ggml_new_f32(lora_ctx, scaling);

From b80bc36ab01052317850006336b1356891b20512 Mon Sep 17 00:00:00 2001
From: jon-chuang <jon-chuang@users.noreply.github.com>
Date: Wed, 26 Apr 2023 23:33:24 +0800
Subject: [PATCH 2/4] minor

---
 llama.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index aaf6ab77e..8c1385ad6 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2018,13 +2018,13 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
             ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];
 
             if (base_t->ne[0] != loraB->ne[1] || base_t->ne[1] != loraA->ne[1]) {
-                fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: );"
-                               " are you sure that this adapter is for this model?\n", __func__, base_t->ne[0], loraB->ne[1], base_t->ne[1], loraA->ne[1]);
+                fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: %" PRId64 ", %" PRId64 ");"
+                               " are you sure that this adapter is for this model?\n", __func__, base_t->ne[1], loraB->ne[1], base_t->ne[0], loraA->ne[1]);
                 return 1;
             }
 
             // w = w + BA*s
-            ggml_tensor * BA = ggml_mul_mat(lora_ctx, loraB, loraA);
+            ggml_tensor * BA = ggml_mul_mat(lora_ctx, loraA, loraB);
 
             if (scaling != 1.0f) {
                 ggml_tensor * scale_tensor = ggml_new_f32(lora_ctx, scaling);

From 9eda98d14be77de1b898350b49af7cc1717a5e35 Mon Sep 17 00:00:00 2001
From: jon-chuang <jon-chuang@users.noreply.github.com>
Date: Thu, 27 Apr 2023 00:41:12 +0800
Subject: [PATCH 3/4] fix

---
 llama.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 8c1385ad6..ef08e3afd 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2017,7 +2017,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
             ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
             ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];
 
-            if (base_t->ne[0] != loraB->ne[1] || base_t->ne[1] != loraA->ne[1]) {
+            // base indim = loraA transposed indim, base outdim = loraB outdim
+            if (base_t->ne[0] != loraA->ne[1] || base_t->ne[1] != loraB->ne[1]) {
                 fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: %" PRId64 ", %" PRId64 ");"
                                " are you sure that this adapter is for this model?\n", __func__, base_t->ne[1], loraB->ne[1], base_t->ne[0], loraA->ne[1]);
                 return 1;

From 6383bbfa5f234a5c6429082d15dea3c01608d5b4 Mon Sep 17 00:00:00 2001
From: jon-chuang <jon-chuang@users.noreply.github.com>
Date: Thu, 27 Apr 2023 00:42:41 +0800
Subject: [PATCH 4/4] fix

---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index ef08e3afd..8532a7ba7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2019,7 +2019,7 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
 
             // base indim = loraA transposed indim, base outdim = loraB outdim
             if (base_t->ne[0] != loraA->ne[1] || base_t->ne[1] != loraB->ne[1]) {
-                fprintf(stderr, "%s: incompatible tensor dimensions (outdims: %" PRId64 ", %" PRId64 ", indims: %" PRId64 ", %" PRId64 ");"
+                fprintf(stderr, "%s: incompatible tensor dimensions (outdim: %" PRId64 ", %" PRId64 ", indim: %" PRId64 ", %" PRId64 ");"
                                " are you sure that this adapter is for this model?\n", __func__, base_t->ne[1], loraB->ne[1], base_t->ne[0], loraA->ne[1]);
                 return 1;
             }