From cd60b88bf7ad7785fb6ac9864e360cf10e42faad Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Wed, 9 Oct 2024 16:40:35 +0200
Subject: [PATCH 1/4] ggml-alloc : remove buffer_id from leaf_alloc (ggml/987)

This commit removes the buffer_id field from the leaf_alloc struct.

The motivation for is that this field is only written to and never
read/used as far as I can tell. Each tensor_alloc has a buffer_id field
and this is what caused me to look into this more closely, to
understand what the buffer_id in leaf_alloc was used for.
---
 ggml/src/ggml-alloc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
index 28548fbbb..041de9e3e 100644
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@@ -348,7 +348,6 @@ struct tensor_alloc {
 };
 
 struct leaf_alloc {
-    int buffer_id;
     struct tensor_alloc leaf;
 };
 
@@ -740,7 +739,6 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
     for (int i = 0; i < graph->n_leafs; i++) {
         struct ggml_tensor * leaf = graph->leafs[i];
         struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
-        galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
         if (leaf->view_src || leaf->data) {
             galloc->leaf_allocs[i].leaf.buffer_id = -1;
             galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;

From 0e41b300ed28f7fe185d938b2e3d56a0bf7411ed Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 16 Oct 2024 11:28:14 +0300
Subject: [PATCH 2/4] sync : ggml

---
 scripts/sync-ggml.last | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last
index 3cca9cc2f..6d31b21b9 100644
--- a/scripts/sync-ggml.last
+++ b/scripts/sync-ggml.last
@@ -1 +1 @@
-564f42082f858f9674b2a2e06e9e779d9ed2c754
+2327bda7a55ac6b72614ac5ebd5c5a5e02553b9b

From 1f66b699c48cb5ab3265ed72c48e8549b1674291 Mon Sep 17 00:00:00 2001
From: Alexey Parfenov <zxed@alkatrazstudio.net>
Date: Wed, 16 Oct 2024 08:35:53 +0000
Subject: [PATCH 3/4] server : fix the disappearance of the end of the text
 (#9867)

* server: fix the disappearance of the end of the text when streaming with stop strings

* simplify "send text" checks
---
 examples/server/server.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index d53cca84c..b5e63384c 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1090,22 +1090,21 @@ struct server_context {
             size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
 
             const std::string str_test = slot.generated_text.substr(pos);
-            bool is_stop_full = false;
+            bool send_text = true;
 
             size_t stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_FULL);
             if (stop_pos != std::string::npos) {
-                is_stop_full = true;
                 slot.generated_text.erase(
                     slot.generated_text.begin() + pos + stop_pos,
                     slot.generated_text.end());
                 pos = std::min(slot.n_sent_text, slot.generated_text.size());
-            } else {
-                is_stop_full = false;
+            } else if (slot.has_next_token) {
                 stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_PARTIAL);
+                send_text = stop_pos == std::string::npos;
             }
 
             // check if there is any token to predict
-            if (stop_pos == std::string::npos || (!slot.has_next_token && !is_stop_full && stop_pos > 0)) {
+            if (send_text) {
                 // no send the stop word in the response
                 result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
                 slot.n_sent_text += result.text_to_send.size();

From 10433e8b457c4cfd759cbb41fc55fc398db4a5da Mon Sep 17 00:00:00 2001
From: Molly Sophia <mollysophia379@gmail.com>
Date: Wed, 16 Oct 2024 18:10:21 +0800
Subject: [PATCH 4/4] llama : add tensor name for "result_norm" (#9907)

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
---
 src/llama.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index 8d44c73c8..c51b49c56 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -16095,9 +16095,11 @@ struct llm_build_context {
         cur = ggml_get_rows(ctx0, cur, inp_out_ids);
 
         cur = llm_build_norm(ctx0, cur, hparams, model.output_norm, model.output_norm_b, LLM_NORM, cb, -1);
-        cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
+        cb(cur, "result_norm", -1);
 
+        cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
         cb(cur, "result_output", -1);
+
         ggml_build_forward_expand(gf, cur);
 
         return gf;