From d8f782acc318fa488e4a97dd5ed9ef254c0bff46 Mon Sep 17 00:00:00 2001 From: themanyone Date: Sun, 21 Jul 2024 16:35:31 -0800 Subject: [PATCH] format batch image output according to --template addendum --template uses [] instead of <> --- common/common.cpp | 7 +++++++ common/common.h | 1 + examples/llava/llava-cli.cpp | 21 +++++++++++++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index dbb724fbb..e64934a73 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -728,6 +728,11 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.image.emplace_back(argv[i]); return true; } + if (arg == "--template") { + CHECK_ARG + params.templ = argv[i]; + return true; + } if (arg == "-i" || arg == "--interactive") { params.interactive = true; return true; @@ -1545,6 +1550,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param options.push_back({ "multi-modality" }); options.push_back({ "*", " --mmproj FILE", "path to a multimodal projector file for LLaVA. see examples/llava/README.md" }); options.push_back({ "*", " --image FILE", "path to an image file. use with multimodal models. Specify multiple times for batching" }); + options.push_back({ "*", " --template STRING", "output template replaces [image] and [description] with generated output" }); + options.push_back({ "backend" }); options.push_back({ "*", " --rpc SERVERS", "comma separated list of RPC servers" }); diff --git a/common/common.h b/common/common.h index 184a53dc0..adbc1c8d5 100644 --- a/common/common.h +++ b/common/common.h @@ -185,6 +185,7 @@ struct gpt_params { // multimodal models (see examples/llava) std::string mmproj = ""; // path to multimodal projector std::vector image; // path to image file(s) + std::string templ = ""; // output template // embedding bool embedding = false; // get only sentence embedding diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index 8c7dd2ae3..570b2f116 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -323,10 +323,27 @@ int main(int argc, char ** argv) { std::cerr << "error: failed to load image " << image << ". Terminating\n\n"; return 1; } - + size_t pos = 0; + std::string str = params.templ; + // format output according to template + if (!params.templ.empty()){ + while((pos = str.find("[image]")) != std::string::npos) + str = str.replace(pos, 7, image); + pos = str.find("[description]"); + if (pos != std::string::npos) + std::cout << str.substr(0, pos); + else + std::cout << params.templ; + fflush(stdout); + } // process the prompt process_prompt(ctx_llava, image_embed, ¶ms, params.prompt); - + // terminate output according to template + if (!params.templ.empty()){ + if (pos != std::string::npos) + std::cout << str.substr(pos + 13); + fflush(stdout); + } llama_print_timings(ctx_llava->ctx_llama); llava_image_embed_free(image_embed); ctx_llava->model = NULL;