llama.cpp : fix embeddings input

2023-07-27 19:03:31 +02:00 · 2023-07-27 19:03:31 +02:00 · 966c069b3f
commit 966c069b3f
parent ba0ab56b63
1 changed files with 9 additions and 0 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1443,6 +1443,7 @@ static struct ggml_cgraph * llama_build_graph(
    if (tokens) {
        struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
 #ifdef LLAMA_USE_ALLOCATOR
        ggml_allocator_alloc_tensor(lctx.alloc, inp_tokens);
        if (!ggml_allocator_is_measure(lctx.alloc)) {
@ -1460,8 +1461,16 @@ static struct ggml_cgraph * llama_build_graph(
 #endif
        inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
 #ifdef LLAMA_USE_ALLOCATOR
        ggml_allocator_alloc_tensor(lctx.alloc, inpL);
        if (!ggml_allocator_is_measure(lctx.alloc)) {
            memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
        }
 #else
        memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
 #endif
    }
    const int i_gpu_start = n_layer - n_gpu_layers;
    (void) i_gpu_start;