expose llava_image_embed_make_with_clip_img

2024-02-19 07:30:36 -08:00 · 2024-02-19 07:30:36 -08:00 · 0ffba498a1
commit 0ffba498a1
parent fb16cc9e9f
3 changed files with 9 additions and 23 deletions
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@ -311,7 +311,7 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx *
    return true;
 }

-static bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) {
+bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) {
    float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model
    if (!image_embd) {
        fprintf(stderr, "Unable to allocate memory for image embeddings\n");
--- a/examples/llava/llava.h
+++ b/examples/llava/llava.h
@ -31,6 +31,8 @@ struct llava_image_embed {
 /** sanity check for clip <-> llava embed size match */
 LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);

+LLAVA_API bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
+
 /** build an image embed from image file bytes */
 LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
 /** build an image embed from a path to an image filename */
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -43,23 +43,6 @@ struct server_params
    int32_t write_timeout = 600;
 };

-// RGB uint8 image
-struct clip_image_u8 {
-    int nx;
-    int ny;
-
-    std::vector<uint8_t> buf;
-};
-
-// RGB float32 image (NHWC)
-// Memory layout: RGBRGBRGB...
-struct clip_image_f32 {
-    int nx;
-    int ny;
-
-    std::vector<float> buf;
-};
-
 bool server_verbose = false;

 static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
@ -720,7 +703,11 @@ struct llama_server_context
                    slot_image img_sl;
                    img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
                    img_sl.img_data = clip_image_u8_init();
-                    img_sl.img_data->buf = image_buffer;
+                    if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
+                    {
+                        LOG_TEE("slot %i - failed to load image [id: %i]\n", slot->id, img_sl.id);
+                        return false;
+                    }
                    LOG_TEE("slot %i - loaded image\n", slot->id);
                    img_sl.request_encode_image = true;
                    slot->images.push_back(img_sl);
@ -998,14 +985,11 @@ struct llama_server_context
                continue;
            }

-            llava_image_embed * embed = llava_image_embed_make_with_bytes(clp_ctx, params.n_threads, img.img_data->buf.data(), img.img_data->buf.size());
-            if (!embed) {
+            if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
                LOG_TEE("Error processing the given image");
                return false;
            }

-            img.image_embedding = embed->embed;
-            img.image_tokens = embed->n_image_pos;

            img.request_encode_image = false;
        }