diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp
index 6ac70ba69..04fe6bef0 100644
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@@ -167,11 +167,29 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
         }
 
         printf("system_prompt: %s\n", system_prompt.c_str());
+        if (params->verbose_prompt) {
+            auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
+            for (int i = 0; i < (int) tmp.size(); i++) {
+                printf("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
+            }
+        }
         printf("user_prompt: %s\n", user_prompt.c_str());
+        if (params->verbose_prompt) {
+            auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
+            for (int i = 0; i < (int) tmp.size(); i++) {
+                printf("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
+            }
+        }
     } else {
         // llava-1.5 native mode
         system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
         user_prompt = prompt + "\nASSISTANT:";
+        if (params->verbose_prompt) {
+            auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
+            for (int i = 0; i < (int) tmp.size(); i++) {
+                printf("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
+            }
+        }
     }
 
     eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, add_bos);
@@ -183,13 +201,17 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
     fprintf(stderr, "\n");
 
     struct llama_sampling_context * ctx_sampling = llama_sampling_init(params->sparams);
-
+    std::string response = "";
     for (int i = 0; i < max_tgt_len; i++) {
         const char * tmp = sample(ctx_sampling, ctx_llava->ctx_llama, &n_past);
+        response += tmp;
         if (strcmp(tmp, "</s>") == 0) break;
         if (strstr(tmp, "###")) break; // Yi-VL behavior
-
         printf("%s", tmp);
+        if (strstr(response.c_str(), "<|im_end|>")) break; // Yi-34B llava-1.6 - for some reason those decode not as the correct token (tokenizer works)
+        if (strstr(response.c_str(), "<|im_start|>")) break; // Yi-34B llava-1.6
+        if (strstr(response.c_str(), "USER:")) break; // mistral llava-1.6
+
         fflush(stdout);
     }
 
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp
index 4ba89eb97..ff99a688e 100644
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -116,7 +116,7 @@ static bool handle_patches(clip_ctx * ctx_clip, std::vector<float *> & image_emb
     memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
     // append without newline tokens (default behavior in llava_arch when not using unpad ):
     memcpy(image_embd_out + clip_n_patches(ctx_clip) * clip_n_mmproj_embd(ctx_clip), (float*)result->data, clip_embd_nbytes(ctx_clip) * (image_embd_v.size()-1)); // grid patches
-    *n_img_pos_out = result->ne[1]+clip_n_patches(ctx_clip);
+    *n_img_pos_out = static_cast<int>(result->ne[1]+clip_n_patches(ctx_clip));
 
     // Debug: Test single segments
     // Current findings: sending base image, sending a segment embedding all works similar to python
@@ -179,12 +179,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
             bool encoded = clip_image_encode(ctx_clip, n_threads, img_res_v[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside
             clip_image_f32_free(img_res_v[i]);
             if (!encoded) {
-                fprintf(stderr, "Unable to encode image - spatial_unpad - subimage %d of %d\n", i+1, img_res_v.size());
+                fprintf(stderr, "Unable to encode image - spatial_unpad - subimage %d of %d\n", i+1, (int)img_res_v.size());
                 return false;
             }
         }
         const int64_t t_img_enc_batch_us = ggml_time_us();
-        printf("%s: %d segments encoded in %8.2f ms\n", __func__, img_res_v.size(), (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
+        printf("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size(), (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
 
 
         std::vector<std::pair<int, int>> grid_pinpoints;