diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index 4cb65a07b..1a1cf7c78 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -311,7 +311,7 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * return true; } -static bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) { +bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) { float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model if (!image_embd) { fprintf(stderr, "Unable to allocate memory for image embeddings\n"); diff --git a/examples/llava/llava.h b/examples/llava/llava.h index 9e9466a5d..2d40f3f1d 100644 --- a/examples/llava/llava.h +++ b/examples/llava/llava.h @@ -31,6 +31,8 @@ struct llava_image_embed { /** sanity check for clip <-> llava embed size match */ LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip); +LLAVA_API bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out); + /** build an image embed from image file bytes */ LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length); /** build an image embed from a path to an image filename */ diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8bbb28f85..c4cd325f5 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -43,23 +43,6 @@ struct server_params int32_t write_timeout = 600; }; -// RGB uint8 image -struct clip_image_u8 { - int nx; - int ny; - - std::vector buf; -}; - -// RGB float32 image (NHWC) -// Memory layout: RGBRGBRGB... -struct clip_image_f32 { - int nx; - int ny; - - std::vector buf; -}; - bool server_verbose = false; static size_t common_part(const std::vector &a, const std::vector &b) @@ -720,7 +703,11 @@ struct llama_server_context slot_image img_sl; img_sl.id = img.count("id") != 0 ? img["id"].get() : slot->images.size(); img_sl.img_data = clip_image_u8_init(); - img_sl.img_data->buf = image_buffer; + if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data)) + { + LOG_TEE("slot %i - failed to load image [id: %i]\n", slot->id, img_sl.id); + return false; + } LOG_TEE("slot %i - loaded image\n", slot->id); img_sl.request_encode_image = true; slot->images.push_back(img_sl); @@ -998,14 +985,11 @@ struct llama_server_context continue; } - llava_image_embed * embed = llava_image_embed_make_with_bytes(clp_ctx, params.n_threads, img.img_data->buf.data(), img.img_data->buf.size()); - if (!embed) { + if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) { LOG_TEE("Error processing the given image"); return false; } - img.image_embedding = embed->embed; - img.image_tokens = embed->n_image_pos; img.request_encode_image = false; }