From 202e28a76a4803933c7fd8a3e04ba77514af0bb8 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 30 Sep 2023 17:12:09 +0800 Subject: [PATCH] do not offload rope for old cublas (+1 squashed commits) Squashed commits: [ca72a66f] fix allocr (+1 squashed commits) Squashed commits: [22a0e30e] updated lite --- klite.embd | 30 +++++++++++++++++++++++++++++- otherarch/llama_v3.cpp | 13 +++++++++++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/klite.embd b/klite.embd index d823ccfca..c74c0c977 100644 --- a/klite.embd +++ b/klite.embd @@ -4066,6 +4066,25 @@ Current version: 72 render_gametext(); } + function load_agnai_wi(obj,chatopponent,myname) + { + console.log("Append Agnai WI"); + current_wi = []; + for (let key in obj.entries) { + var itm = obj.entries[key]; + var karr = itm.keywords; + let nwi = { + "key": karr.join(","), + "keysecondary": "", + "content": itm.entry, + "comment": "", + "folder": null, + "selective": false, + "constant": false + }; + current_wi.push(nwi); + } + } function load_tavern_wi(obj,chatopponent,myname) { console.log("Append Tavern WI"); @@ -4084,7 +4103,7 @@ Current version: 72 } let nwi = { "key": karr.join(","), - "keysecondary": (ksarr.length > 0 ? ksarr.join(",") : ""), + "keysecondary": ((ksarr && ksarr.length) > 0 ? ksarr.join(",") : ""), "content": itm.content, "comment": itm.comment, "folder": null, @@ -4119,11 +4138,16 @@ Current version: 72 examplemsg = "\n"+examplemsg; } let combinedmem = memory + scenario + examplemsg; + let agnaidatafieldsempty = scenario + examplemsg + (obj.personality?obj.personality:"") + greeting; //check if it's a world info only card, if so, do not restart game if(combinedmem.trim()=="" && greeting=="" && obj.entries) { load_tavern_wi(obj,chatopponent,myname); } + else if(agnaidatafieldsempty.trim()=="" && obj.entries && obj.kind=="memory") + { + load_agnai_wi(obj,chatopponent,myname); + } else { restart_new_game(); @@ -4138,6 +4162,10 @@ Current version: 72 { load_tavern_wi(obj.character_book,chatopponent,myname); } + else if(obj.entries && obj.entries.length>0) + { + load_agnai_wi(obj,chatopponent,myname); + } } render_gametext(); } diff --git a/otherarch/llama_v3.cpp b/otherarch/llama_v3.cpp index 48375a94d..26c1b2683 100644 --- a/otherarch/llama_v3.cpp +++ b/otherarch/llama_v3.cpp @@ -1592,10 +1592,11 @@ static struct ggml_cgraph * llama_v3_build_graph( offload_func_kq(tmpq); ggml_set_name(tmpq, "tmpq"); - struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens); - offload_func_kq(KQ_pos); ggml_set_name(KQ_pos, "KQ_pos"); + +#ifdef LLAMA_V3_USE_ALLOCATOR + offload_func_kq(KQ_pos); //don't offload rope for cublas, its broken now since ring buffer was added ggml_allocr_alloc(lctx.alloc, KQ_pos); if (!ggml_allocr_is_measure(lctx.alloc)) { int * data = (int *) KQ_pos->data; @@ -1603,6 +1604,14 @@ static struct ggml_cgraph * llama_v3_build_graph( data[i] = n_past + i; } } +#else + { + int * data = (int *) KQ_pos->data; + for (int i = 0; i < N; ++i) { + data[i] = n_past + i; + } + } +#endif struct ggml_tensor * Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), KQ_pos, n_embd_head, 0, 0, freq_base, freq_scale); offload_func_kq(Kcur);