Update llava-cli.cpp to support comma-delimited image lists
Add in the ability to specify a comma-delimited list of images at the command line for batch-processing of multiple images without needing to reload the model file.
This commit is contained in:
parent
b06c16ef9f
commit
41d7c5eaca
1 changed files with 44 additions and 23 deletions
|
@ -208,26 +208,28 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static struct llava_context * llava_init(gpt_params * params) {
|
static struct llama_model * llava_init(gpt_params * params) {
|
||||||
const char * clip_path = params->mmproj.c_str();
|
|
||||||
|
|
||||||
auto prompt = params->prompt;
|
|
||||||
if (prompt.empty()) {
|
|
||||||
prompt = "describe the image in detail.";
|
|
||||||
}
|
|
||||||
|
|
||||||
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
|
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
llama_numa_init(params->numa);
|
llama_numa_init(params->numa);
|
||||||
|
|
||||||
llama_model_params model_params = llama_model_params_from_gpt_params(*params);
|
llama_model_params model_params = llama_model_params_from_gpt_params(*params);
|
||||||
|
|
||||||
llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
|
llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
|
||||||
if (model == NULL) {
|
if (model == NULL) {
|
||||||
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
|
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
return model;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct llava_context * llava_init_context(gpt_params * params, llama_model * model) {
|
||||||
|
const char * clip_path = params->mmproj.c_str();
|
||||||
|
|
||||||
|
auto prompt = params->prompt;
|
||||||
|
if (prompt.empty()) {
|
||||||
|
prompt = "describe the image in detail.";
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
|
||||||
|
|
||||||
llama_context_params ctx_params = llama_context_params_from_gpt_params(*params);
|
llama_context_params ctx_params = llama_context_params_from_gpt_params(*params);
|
||||||
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
|
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
|
||||||
|
@ -273,23 +275,42 @@ int main(int argc, char ** argv) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ctx_llava = llava_init(¶ms);
|
auto model = llava_init(¶ms);
|
||||||
if (ctx_llava == NULL) {
|
if (model == NULL) {
|
||||||
fprintf(stderr, "%s: error: failed to init llava\n", __func__);
|
fprintf(stderr, "%s: error: failed to init llava model\n", __func__);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto image_embed = load_image(ctx_llava, ¶ms);
|
std::stringstream ss(params.image);
|
||||||
if (!image_embed) {
|
std::vector<std::string> imagestack;
|
||||||
return 1;
|
|
||||||
|
while( ss.good() )
|
||||||
|
{
|
||||||
|
std::string substr;
|
||||||
|
getline( ss, substr, ',' );
|
||||||
|
imagestack.push_back( substr );
|
||||||
}
|
}
|
||||||
|
|
||||||
// process the prompt
|
for (auto & image : imagestack) {
|
||||||
process_prompt(ctx_llava, image_embed, ¶ms, params.prompt);
|
|
||||||
|
|
||||||
llama_print_timings(ctx_llava->ctx_llama);
|
auto ctx_llava = llava_init_context(¶ms, model);
|
||||||
|
params.image=image;
|
||||||
|
|
||||||
llava_image_embed_free(image_embed);
|
auto image_embed = load_image(ctx_llava, ¶ms);
|
||||||
llava_free(ctx_llava);
|
if (!image_embed) {
|
||||||
|
std::cerr << "error: failed to load image " << params.image << ". Terminating\n\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// process the prompt
|
||||||
|
process_prompt(ctx_llava, image_embed, ¶ms, params.prompt);
|
||||||
|
|
||||||
|
llama_print_timings(ctx_llava->ctx_llama);
|
||||||
|
|
||||||
|
llava_image_embed_free(image_embed);
|
||||||
|
ctx_llava->model = NULL;
|
||||||
|
llava_free(ctx_llava);
|
||||||
|
}
|
||||||
|
llama_free_model(model);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue