From 966c069b3fe49148b805d5ef4d2bf3fb6043c263 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Thu, 27 Jul 2023 19:03:31 +0200
Subject: [PATCH] llama.cpp : fix embeddings input

---
 llama.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/llama.cpp b/llama.cpp
index 3ae2a895e..02582c483 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1443,6 +1443,7 @@ static struct ggml_cgraph * llama_build_graph(
 
     if (tokens) {
         struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
+
 #ifdef LLAMA_USE_ALLOCATOR
         ggml_allocator_alloc_tensor(lctx.alloc, inp_tokens);
         if (!ggml_allocator_is_measure(lctx.alloc)) {
@@ -1460,7 +1461,15 @@ static struct ggml_cgraph * llama_build_graph(
 #endif
 
         inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
+
+#ifdef LLAMA_USE_ALLOCATOR
+        ggml_allocator_alloc_tensor(lctx.alloc, inpL);
+        if (!ggml_allocator_is_measure(lctx.alloc)) {
+            memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
+        }
+#else
         memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
+#endif
     }
 
     const int i_gpu_start = n_layer - n_gpu_layers;