From 9a4d128226305a5fae616ac968630f3949f9e79b Mon Sep 17 00:00:00 2001 From: James O'Leary <65884233+jpohhhh@users.noreply.github.com> Date: Wed, 14 Feb 2024 11:50:06 -0500 Subject: [PATCH] llava example fix for wide images On wide images, but seemingly not tall, there was a crash on memcpy. Investigation showed the number of embeddings differed from num patches. (i.e. a print statement showed num_images != image_embd_v.size() for a wide aspect ratio image) This is the most minimal change that resolves the crash, reviewers familiar with clip may identify a better fix based on grid_shape. --- examples/llava/llava.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index 22953417f..e540dea55 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -167,7 +167,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector struct ggml_tensor * image_features = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, clip_n_mmproj_embd(ctx_clip), clip_n_patches(ctx_clip), num_images - 1); // example: 4096 x 576 x 4 // ggml_tensor_printf(image_features,"image_features",__LINE__,false,false); // fill it with the image embeddings, ignoring the base - for (size_t i = 1; i < num_images; i++) { + for (size_t i = 1; i < image_embd_v.size(); i++) { size_t offset = (i-1) * clip_embd_nbytes(ctx_clip); memcpy((uint8_t *)(image_features->data) + offset, image_embd_v[i], clip_embd_nbytes(ctx_clip)); }