From be2ac38a2825635e73d89e6740a8000a4944a67f Mon Sep 17 00:00:00 2001
From: Galunid <karolek1231456@gmail.com>
Date: Sun, 12 Nov 2023 04:30:17 +0100
Subject: [PATCH] Make qrot, krot contiguous

---
 llama.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index abb9d534c..aac1e2611 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4712,20 +4712,20 @@ struct llm_build_context {
                 cb(Vcur, "Vcur", il);
 
                 // RoPE the first n_rot of q/k, pass the other half, and concat.
-                struct ggml_tensor * qrot = ggml_view_3d(
+                struct ggml_tensor * qrot = ggml_cont(ctx0, ggml_view_3d(
                         ctx0, tmpq, hparams.n_rot, n_head, n_tokens,
                         ggml_element_size(tmpq) * n_embd_head,
                         ggml_element_size(tmpq) * n_embd_head * n_head,
                         0
-                        );
+                        ));
                 cb(qrot, "qrot", il);
 
-                struct ggml_tensor * krot = ggml_view_3d(
+                struct ggml_tensor * krot = ggml_cont(ctx0, ggml_view_3d(
                         ctx0, tmpk, hparams.n_rot, n_head, n_tokens,
                         ggml_element_size(tmpk) * n_embd_head,
                         ggml_element_size(tmpk) * n_embd_head * n_head_kv,
                         0
-                        );
+                        ));
                 cb(krot, "krot", il);
 
                 // get the second half of tmpq, e.g tmpq[n_rot:, :, :]