From 966c069b3fe49148b805d5ef4d2bf3fb6043c263 Mon Sep 17 00:00:00 2001 From: slaren Date: Thu, 27 Jul 2023 19:03:31 +0200 Subject: [PATCH] llama.cpp : fix embeddings input --- llama.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llama.cpp b/llama.cpp index 3ae2a895e..02582c483 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1443,6 +1443,7 @@ static struct ggml_cgraph * llama_build_graph( if (tokens) { struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + #ifdef LLAMA_USE_ALLOCATOR ggml_allocator_alloc_tensor(lctx.alloc, inp_tokens); if (!ggml_allocator_is_measure(lctx.alloc)) { @@ -1460,7 +1461,15 @@ static struct ggml_cgraph * llama_build_graph( #endif inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N); + +#ifdef LLAMA_USE_ALLOCATOR + ggml_allocator_alloc_tensor(lctx.alloc, inpL); + if (!ggml_allocator_is_measure(lctx.alloc)) { + memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL)); + } +#else memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL)); +#endif } const int i_gpu_start = n_layer - n_gpu_layers;