server : remove clip structs

This commit is contained in:
Georgi Gerganov 2024-02-13 20:51:20 +02:00
parent 9d166b0850
commit c92431a0a4
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 11 additions and 22 deletions

View file

@ -1483,7 +1483,7 @@ static void clip_image_convert_f32_to_u8(const clip_image_f32& src, clip_image_u
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector // returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
// res_imgs memory is being allocated here, previous allocations will be freed if found // res_imgs memory is being allocated here, previous allocations will be freed if found
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs ) { bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs) {
bool pad_to_square = true; bool pad_to_square = true;
if (!ctx->has_vision_encoder) { if (!ctx->has_vision_encoder) {
printf("This gguf file seems to have no vision encoder\n"); printf("This gguf file seems to have no vision encoder\n");
@ -1648,9 +1648,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
// clip_image_u8_free(temp2); // clip_image_u8_free(temp2);
// } // }
// res_imgs.push_back(res); // res_imgs.push_back(res);
res_imgs.size = 1; res_imgs.size = 1;
res_imgs.data = new clip_image_f32[res_imgs.size]; res_imgs.data = new clip_image_f32[res_imgs.size];
res_imgs.data[0] = std::move(*res); res_imgs.data[0] = std::move(*res);
return true; return true;
} }

View file

@ -31,23 +31,6 @@
using json = nlohmann::json; using json = nlohmann::json;
// RGB uint8 image
struct clip_image_u8 {
int nx;
int ny;
std::vector<uint8_t> buf;
};
// RGB float32 image (NHWC)
// Memory layout: RGBRGBRGB...
struct clip_image_f32 {
int nx;
int ny;
std::vector<float> buf;
};
struct server_params struct server_params
{ {
std::string hostname = "127.0.0.1"; std::string hostname = "127.0.0.1";
@ -992,10 +975,13 @@ struct llama_server_context
{ {
LOG_TEE("Error processing the given image"); LOG_TEE("Error processing the given image");
clip_free(clp_ctx); clip_free(clp_ctx);
delete[] img_res_v.data; clip_image_f32_free(img_res_v.data);
return false; return false;
} }
clip_image_f32 * img_res = &img_res_v.data[0];
// note: assumes only one image was returned by clip_image_preprocess
clip_image_f32 * img_res = img_res_v.data;
img.image_tokens = clip_n_patches(clp_ctx); img.image_tokens = clip_n_patches(clp_ctx);
img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx)); img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx));
if (!img.image_embedding) if (!img.image_embedding)
@ -1010,8 +996,9 @@ struct llama_server_context
LOG_TEE("Unable to encode image\n"); LOG_TEE("Unable to encode image\n");
return false; return false;
} }
// clip_image_f32_free(img_res);
delete[] img_res_v.data; clip_image_f32_free(img_res_v.data);
img.request_encode_image = false; img.request_encode_image = false;
} }