This commit is contained in:
Henry Kroll III 2024-09-02 16:59:45 +02:00 committed by GitHub
commit 8007d0665f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 27 additions and 2 deletions

View file

@ -1075,6 +1075,11 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
params.image.emplace_back(argv[i]); params.image.emplace_back(argv[i]);
return true; return true;
} }
if (arg == "--template") {
CHECK_ARG
params.templ = argv[i];
return true;
}
if (arg == "-i" || arg == "--interactive") { if (arg == "-i" || arg == "--interactive") {
params.interactive = true; params.interactive = true;
return true; return true;
@ -1927,6 +1932,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "multi-modality" }); options.push_back({ "multi-modality" });
options.push_back({ "*", " --mmproj FILE", "path to a multimodal projector file for LLaVA. see examples/llava/README.md" }); options.push_back({ "*", " --mmproj FILE", "path to a multimodal projector file for LLaVA. see examples/llava/README.md" });
options.push_back({ "*", " --image FILE", "path to an image file. use with multimodal models. Specify multiple times for batching" }); options.push_back({ "*", " --image FILE", "path to an image file. use with multimodal models. Specify multiple times for batching" });
options.push_back({ "*", " --template STRING", "output template replaces [image] and [description] with generated output" });
options.push_back({ "backend" }); options.push_back({ "backend" });
options.push_back({ "*", " --rpc SERVERS", "comma separated list of RPC servers" }); options.push_back({ "*", " --rpc SERVERS", "comma separated list of RPC servers" });

View file

@ -203,6 +203,7 @@ struct gpt_params {
// multimodal models (see examples/llava) // multimodal models (see examples/llava)
std::string mmproj = ""; // path to multimodal projector std::string mmproj = ""; // path to multimodal projector
std::vector<std::string> image; // path to image file(s) std::vector<std::string> image; // path to image file(s)
std::string templ = ""; // output template
// embedding // embedding
bool embedding = false; // get only sentence embedding bool embedding = false; // get only sentence embedding

View file

@ -323,10 +323,27 @@ int main(int argc, char ** argv) {
std::cerr << "error: failed to load image " << image << ". Terminating\n\n"; std::cerr << "error: failed to load image " << image << ". Terminating\n\n";
return 1; return 1;
} }
size_t pos = 0;
std::string str = params.templ;
// format output according to template
if (!params.templ.empty()){
while((pos = str.find("[image]")) != std::string::npos)
str = str.replace(pos, 7, image);
pos = str.find("[description]");
if (pos != std::string::npos)
std::cout << str.substr(0, pos);
else
std::cout << params.templ;
fflush(stdout);
}
// process the prompt // process the prompt
process_prompt(ctx_llava, image_embed, &params, params.prompt); process_prompt(ctx_llava, image_embed, &params, params.prompt);
// terminate output according to template
if (!params.templ.empty()){
if (pos != std::string::npos)
std::cout << str.substr(pos + 13);
fflush(stdout);
}
llama_print_timings(ctx_llava->ctx_llama); llama_print_timings(ctx_llava->ctx_llama);
llava_image_embed_free(image_embed); llava_image_embed_free(image_embed);
ctx_llava->model = NULL; ctx_llava->model = NULL;