llava : fix memory management bug (#5491)

* Fix memory management in llava and server code Fixes this error: llama_new_context_with_model: graph splits (measure): 3 Available slots: -> Slot 0 - max context: 6000 {"timestamp":1707926446,"level":"INFO","function":"main","line":2623,"message":"model loaded"} all slots are idle and system prompt is empty, clear the KV cache slot 0 - loaded image slot 0 is processing [task id: 0] slot 0 : kv cache rm - [0, end) slot 0 - encoding image [id: 1] munmap_chunk(): invalid pointer Aborted * Make it cleaner by checking size in batch free wrapper
2024-02-15 09:01:57 +01:00 · 2024-02-15 09:01:57 +01:00 · 0d4177126b
commit 0d4177126b
parent 7930a8a6e8
3 changed files with 28 additions and 9 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -975,7 +975,12 @@ struct llama_server_context
            {
                LOG_TEE("Error processing the given image");
                clip_free(clp_ctx);
-                clip_image_f32_free(img_res_v.data);
+                clip_image_f32_batch_free(img_res_v);
+                return false;
+            }
+            if (img_res_v.size == 0)
+            {
+                LOG_TEE("Error processing the given image");
                return false;
            }

@ -987,6 +992,7 @@ struct llama_server_context
            if (!img.image_embedding)
            {
                LOG_TEE("Unable to allocate memory for image embeddings\n");
+                clip_image_f32_batch_free(img_res_v);
                clip_free(clp_ctx);
                return false;
            }
@ -994,10 +1000,11 @@ struct llama_server_context
            if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding))
            {
                LOG_TEE("Unable to encode image\n");
+                clip_image_f32_batch_free(img_res_v);
                return false;
            }

-            clip_image_f32_free(img_res_v.data);
+            clip_image_f32_batch_free(img_res_v);

            img.request_encode_image = false;
        }