diff --git a/Makefile b/Makefile index 0a2070b53..083a639c5 100644 --- a/Makefile +++ b/Makefile @@ -691,7 +691,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) -server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) +server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h examples/llava/llava.h examples/llava/llava.cpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index a0b46970b..d002db9b9 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -5,6 +5,7 @@ #include "oai.hpp" #include "../llava/clip.h" +#include "../llava/llava.h" #include "stb_image.h" @@ -31,6 +32,14 @@ using json = nlohmann::json; +// TODO should be in clip.h? +struct clip_image_u8 { + int nx; + int ny; + + std::vector buf; +}; + struct server_params { std::string hostname = "127.0.0.1"; @@ -702,11 +711,12 @@ struct llama_server_context slot_image img_sl; img_sl.id = img.count("id") != 0 ? img["id"].get() : slot->images.size(); img_sl.img_data = clip_image_u8_init(); - if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data)) - { - LOG_TEE("slot %i - failed to load image [id: %i]\n", slot->id, img_sl.id); - return false; - } + img_sl.img_data->buf = image_buffer; + // if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data)) + // { + // LOG_TEE("slot %i - failed to load image [id: %i]\n", slot->id, img_sl.id); + // return false; + // } LOG_TEE("slot %i - loaded image\n", slot->id); img_sl.request_encode_image = true; slot->images.push_back(img_sl); @@ -983,43 +993,16 @@ struct llama_server_context { continue; } - clip_image_f32_batch img_res_v; - img_res_v.size = 0; - img_res_v.data = nullptr; - if (!clip_image_preprocess(clp_ctx, img.img_data, img_res_v)) - { - LOG_TEE("Error processing the given image"); - clip_free(clp_ctx); - clip_image_f32_batch_free(img_res_v); - return false; - } - if (img_res_v.size == 0) - { + + // TODO call encode_image_with_clip instead? + llava_image_embed * embed = llava_image_embed_make_with_bytes(clp_ctx, params.n_threads, img.img_data->buf.data(), img.img_data->buf.size()); + if (!embed) { LOG_TEE("Error processing the given image"); return false; } - // note: assumes only one image was returned by clip_image_preprocess - clip_image_f32 * img_res = img_res_v.data; - - img.image_tokens = clip_n_patches(clp_ctx); - img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx)); - if (!img.image_embedding) - { - LOG_TEE("Unable to allocate memory for image embeddings\n"); - clip_image_f32_batch_free(img_res_v); - clip_free(clp_ctx); - return false; - } - LOG_TEE("slot %i - encoding image [id: %i]\n", slot.id, img.id); - if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding)) - { - LOG_TEE("Unable to encode image\n"); - clip_image_f32_batch_free(img_res_v); - return false; - } - - clip_image_f32_batch_free(img_res_v); + img.image_embedding = embed->embed; + img.image_tokens = embed->n_image_pos; img.request_encode_image = false; }