From 202e28a76a4803933c7fd8a3e04ba77514af0bb8 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sat, 30 Sep 2023 17:12:09 +0800
Subject: [PATCH] do not offload rope for old cublas (+1 squashed commits)

Squashed commits:

[ca72a66f] fix allocr (+1 squashed commits)

Squashed commits:

[22a0e30e] updated lite
---
 klite.embd             | 30 +++++++++++++++++++++++++++++-
 otherarch/llama_v3.cpp | 13 +++++++++++--
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/klite.embd b/klite.embd
index d823ccfca..c74c0c977 100644
--- a/klite.embd
+++ b/klite.embd
@@ -4066,6 +4066,25 @@ Current version: 72
 		render_gametext();
 	}
 
+	function load_agnai_wi(obj,chatopponent,myname)
+	{
+		console.log("Append Agnai WI");
+		current_wi = [];
+		for (let key in obj.entries) {
+			var itm = obj.entries[key];
+			var karr = itm.keywords;
+			let nwi = {
+				"key": karr.join(","),
+				"keysecondary": "",
+				"content": itm.entry,
+				"comment": "",
+				"folder": null,
+				"selective": false,
+				"constant": false
+			};
+			current_wi.push(nwi);
+		}
+	}
 	function load_tavern_wi(obj,chatopponent,myname)
 	{
 		console.log("Append Tavern WI");
@@ -4084,7 +4103,7 @@ Current version: 72
 			}
 			let nwi = {
 				"key": karr.join(","),
-				"keysecondary": (ksarr.length > 0 ? ksarr.join(",") : ""),
+				"keysecondary": ((ksarr && ksarr.length) > 0 ? ksarr.join(",") : ""),
 				"content": itm.content,
 				"comment": itm.comment,
 				"folder": null,
@@ -4119,11 +4138,16 @@ Current version: 72
 			examplemsg = "\n"+examplemsg;
 		}
 		let combinedmem = memory + scenario + examplemsg;
+		let agnaidatafieldsempty = scenario + examplemsg + (obj.personality?obj.personality:"") + greeting;
 		//check if it's a world info only card, if so, do not restart game
 		if(combinedmem.trim()=="" && greeting=="" && obj.entries)
 		{
 			load_tavern_wi(obj,chatopponent,myname);
 		}
+		else if(agnaidatafieldsempty.trim()=="" && obj.entries && obj.kind=="memory")
+		{
+			load_agnai_wi(obj,chatopponent,myname);
+		}
 		else
 		{
 			restart_new_game();
@@ -4138,6 +4162,10 @@ Current version: 72
 			{
 				load_tavern_wi(obj.character_book,chatopponent,myname);
 			}
+			else if(obj.entries && obj.entries.length>0)
+			{
+				load_agnai_wi(obj,chatopponent,myname);
+			}
 		}
 		render_gametext();
 	}
diff --git a/otherarch/llama_v3.cpp b/otherarch/llama_v3.cpp
index 48375a94d..26c1b2683 100644
--- a/otherarch/llama_v3.cpp
+++ b/otherarch/llama_v3.cpp
@@ -1592,10 +1592,11 @@ static struct ggml_cgraph * llama_v3_build_graph(
             offload_func_kq(tmpq);
             ggml_set_name(tmpq, "tmpq");
 
-
             struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens);
-            offload_func_kq(KQ_pos);
             ggml_set_name(KQ_pos, "KQ_pos");
+
+#ifdef LLAMA_V3_USE_ALLOCATOR
+            offload_func_kq(KQ_pos); //don't offload rope for cublas, its broken now since ring buffer was added
             ggml_allocr_alloc(lctx.alloc, KQ_pos);
             if (!ggml_allocr_is_measure(lctx.alloc)) {
                int * data = (int *) KQ_pos->data;
@@ -1603,6 +1604,14 @@ static struct ggml_cgraph * llama_v3_build_graph(
                     data[i] = n_past + i;
                 }
             }
+#else
+            {
+                int * data = (int *) KQ_pos->data;
+                for (int i = 0; i < N; ++i) {
+                    data[i] = n_past + i;
+                }
+            }
+#endif
 
             struct ggml_tensor * Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), KQ_pos, n_embd_head, 0, 0, freq_base, freq_scale);
             offload_func_kq(Kcur);