Update llava-cli.cpp

2024-01-23 15:28:16 +01:00 · 2024-01-23 15:28:16 +01:00 · 0dbd295e39
commit 0dbd295e39
parent 51462f1f23
1 changed files with 25 additions and 35 deletions
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@ -148,45 +148,35 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
    const int max_tgt_len = params->n_predict < 0 ? 256 : params->n_predict;
    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx_llava->ctx_llama));

-    #if 0
-    // llava chat format is "<system_prompt>\nUSER:<image_embeddings>\n<textual_prompt>\nASSISTANT:"
-    eval_string(ctx_llava->ctx_llama, "A chat between a curious human and an artificial intelligence assistant.  The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:", params->n_batch, &n_past, add_bos);
-    llava_eval_image_embed(ctx_llava->ctx_llama, image_embed, params->n_batch, &n_past);
-    eval_string(ctx_llava->ctx_llama, (prompt + "\nASSISTANT:").c_str(), params->n_batch, &n_past, false);
-    #else
-        std::string system_prompt, user_prompt;
-        size_t image_pos = prompt.find("<image>");
-        if (image_pos != std::string::npos) {
-            // new templating mode: Provide the full prompt including system message and use <image> as a placeholder for the image
+    std::string system_prompt, user_prompt;
+    size_t image_pos = prompt.find("<image>");
+    if (image_pos != std::string::npos) {
+        // new templating mode: Provide the full prompt including system message and use <image> as a placeholder for the image

-            system_prompt = prompt.substr(0, image_pos);
-            user_prompt = prompt.substr(image_pos + std::string("<image>").length());
-            // We replace \n with actual newlines in user_prompt, just in case -e was not used in templating string
-            size_t pos = 0;
-            while ((pos = user_prompt.find("\\n", pos)) != std::string::npos) {
-                user_prompt.replace(pos, 2, "\n");
-                pos += 1; // Advance past the replaced newline
-            }
-            while ((pos = system_prompt.find("\\n", pos)) != std::string::npos) {
-                system_prompt.replace(pos, 2, "\n");
-                pos += 1; // Advance past the replaced newline
-            }
-
-            printf("system_prompt: %s\n", system_prompt.c_str());
-            printf("user_prompt: %s\n", user_prompt.c_str());
-        } else {
-            // llava-1.5 native mode
-            system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
-            user_prompt = prompt + "\nASSISTANT:";
+        system_prompt = prompt.substr(0, image_pos);
+        user_prompt = prompt.substr(image_pos + std::string("<image>").length());
+        // We replace \n with actual newlines in user_prompt, just in case -e was not used in templating string
+        size_t pos = 0;
+        while ((pos = user_prompt.find("\\n", pos)) != std::string::npos) {
+            user_prompt.replace(pos, 2, "\n");
+            pos += 1; // Advance past the replaced newline
+        }
+        while ((pos = system_prompt.find("\\n", pos)) != std::string::npos) {
+            system_prompt.replace(pos, 2, "\n");
+            pos += 1; // Advance past the replaced newline
        }

+        printf("system_prompt: %s\n", system_prompt.c_str());
+        printf("user_prompt: %s\n", user_prompt.c_str());
+    } else {
+        // llava-1.5 native mode
+        system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
+        user_prompt = prompt + "\nASSISTANT:";
+    }

-
-        eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, add_bos);
-        llava_eval_image_embed(ctx_llava->ctx_llama, image_embed, params->n_batch, &n_past);
-        eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false);
-
-    #endif
+    eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, add_bos);
+    llava_eval_image_embed(ctx_llava->ctx_llama, image_embed, params->n_batch, &n_past);
+    eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false);

    // generate the response