diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 1cdb2be74..73438e3f5 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -1483,7 +1483,7 @@ static void clip_image_convert_f32_to_u8(const clip_image_f32& src, clip_image_u
 
 // returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
 // res_imgs memory is being allocated here, previous allocations will be freed if found
-bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs ) {
+bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs) {
     bool pad_to_square = true;
     if (!ctx->has_vision_encoder) {
         printf("This gguf file seems to have no vision encoder\n");
@@ -1648,9 +1648,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
     //     clip_image_u8_free(temp2);
     // }
     // res_imgs.push_back(res);
+
     res_imgs.size = 1;
     res_imgs.data = new clip_image_f32[res_imgs.size];
     res_imgs.data[0] = std::move(*res);
+
     return true;
 }
 
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index a9f71725d..6e3434030 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -31,23 +31,6 @@
 
 using json = nlohmann::json;
 
-// RGB uint8 image
-struct clip_image_u8 {
-    int nx;
-    int ny;
-
-    std::vector<uint8_t> buf;
-};
-
-// RGB float32 image (NHWC)
-// Memory layout: RGBRGBRGB...
-struct clip_image_f32 {
-    int nx;
-    int ny;
-
-    std::vector<float> buf;
-};
-
 struct server_params
 {
     std::string hostname = "127.0.0.1";
@@ -992,10 +975,13 @@ struct llama_server_context
             {
                 LOG_TEE("Error processing the given image");
                 clip_free(clp_ctx);
-                delete[] img_res_v.data;
+                clip_image_f32_free(img_res_v.data);
                 return false;
             }
-            clip_image_f32 * img_res = &img_res_v.data[0];
+
+            // note: assumes only one image was returned by clip_image_preprocess
+            clip_image_f32 * img_res = img_res_v.data;
+
             img.image_tokens = clip_n_patches(clp_ctx);
             img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx));
             if (!img.image_embedding)
@@ -1010,8 +996,9 @@ struct llama_server_context
                 LOG_TEE("Unable to encode image\n");
                 return false;
             }
-            // clip_image_f32_free(img_res);
-            delete[] img_res_v.data;
+
+            clip_image_f32_free(img_res_v.data);
+
             img.request_encode_image = false;
         }