From 9a4d128226305a5fae616ac968630f3949f9e79b Mon Sep 17 00:00:00 2001
From: James O'Leary <65884233+jpohhhh@users.noreply.github.com>
Date: Wed, 14 Feb 2024 11:50:06 -0500
Subject: [PATCH] llava example fix for wide images

On wide images, but seemingly not tall, there was a crash on memcpy.

Investigation showed the number of embeddings differed from num patches.
(i.e. a print statement showed num_images != image_embd_v.size() for a wide aspect ratio image)

This is the most minimal change that resolves the crash,
reviewers familiar with clip may identify
a better fix based on grid_shape.
---
 examples/llava/llava.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp
index 22953417f..e540dea55 100644
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -167,7 +167,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
     struct ggml_tensor * image_features = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, clip_n_mmproj_embd(ctx_clip), clip_n_patches(ctx_clip), num_images - 1); // example: 4096 x 576 x 4
     // ggml_tensor_printf(image_features,"image_features",__LINE__,false,false);
     // fill it with the image embeddings, ignoring the base
-    for (size_t i = 1; i < num_images; i++) {
+    for (size_t i = 1; i < image_embd_v.size(); i++) {
         size_t offset = (i-1) * clip_embd_nbytes(ctx_clip);
         memcpy((uint8_t *)(image_features->data) + offset, image_embd_v[i], clip_embd_nbytes(ctx_clip));
     }