llama.cpp : fix embeddings input

This commit is contained in:
slaren 2023-07-27 19:03:31 +02:00
parent ba0ab56b63
commit 966c069b3f

View file

@ -1443,6 +1443,7 @@ static struct ggml_cgraph * llama_build_graph(
if (tokens) { if (tokens) {
struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
#ifdef LLAMA_USE_ALLOCATOR #ifdef LLAMA_USE_ALLOCATOR
ggml_allocator_alloc_tensor(lctx.alloc, inp_tokens); ggml_allocator_alloc_tensor(lctx.alloc, inp_tokens);
if (!ggml_allocator_is_measure(lctx.alloc)) { if (!ggml_allocator_is_measure(lctx.alloc)) {
@ -1460,8 +1461,16 @@ static struct ggml_cgraph * llama_build_graph(
#endif #endif
inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N); inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
#ifdef LLAMA_USE_ALLOCATOR
ggml_allocator_alloc_tensor(lctx.alloc, inpL);
if (!ggml_allocator_is_measure(lctx.alloc)) {
memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL)); memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
} }
#else
memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
#endif
}
const int i_gpu_start = n_layer - n_gpu_layers; const int i_gpu_start = n_layer - n_gpu_layers;
(void) i_gpu_start; (void) i_gpu_start;