added verbose_prompt support into cli
added stopwords for llava-1.6 into cli
This commit is contained in:
parent
60c5f46ba7
commit
0dd6c9da2a
2 changed files with 27 additions and 5 deletions
|
@ -167,11 +167,29 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("system_prompt: %s\n", system_prompt.c_str());
|
printf("system_prompt: %s\n", system_prompt.c_str());
|
||||||
|
if (params->verbose_prompt) {
|
||||||
|
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
|
||||||
|
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||||
|
printf("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
printf("user_prompt: %s\n", user_prompt.c_str());
|
printf("user_prompt: %s\n", user_prompt.c_str());
|
||||||
|
if (params->verbose_prompt) {
|
||||||
|
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
||||||
|
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||||
|
printf("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// llava-1.5 native mode
|
// llava-1.5 native mode
|
||||||
system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
|
system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
|
||||||
user_prompt = prompt + "\nASSISTANT:";
|
user_prompt = prompt + "\nASSISTANT:";
|
||||||
|
if (params->verbose_prompt) {
|
||||||
|
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
||||||
|
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||||
|
printf("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, add_bos);
|
eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, add_bos);
|
||||||
|
@ -183,13 +201,17 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
|
|
||||||
struct llama_sampling_context * ctx_sampling = llama_sampling_init(params->sparams);
|
struct llama_sampling_context * ctx_sampling = llama_sampling_init(params->sparams);
|
||||||
|
std::string response = "";
|
||||||
for (int i = 0; i < max_tgt_len; i++) {
|
for (int i = 0; i < max_tgt_len; i++) {
|
||||||
const char * tmp = sample(ctx_sampling, ctx_llava->ctx_llama, &n_past);
|
const char * tmp = sample(ctx_sampling, ctx_llava->ctx_llama, &n_past);
|
||||||
|
response += tmp;
|
||||||
if (strcmp(tmp, "</s>") == 0) break;
|
if (strcmp(tmp, "</s>") == 0) break;
|
||||||
if (strstr(tmp, "###")) break; // Yi-VL behavior
|
if (strstr(tmp, "###")) break; // Yi-VL behavior
|
||||||
|
|
||||||
printf("%s", tmp);
|
printf("%s", tmp);
|
||||||
|
if (strstr(response.c_str(), "<|im_end|>")) break; // Yi-34B llava-1.6 - for some reason those decode not as the correct token (tokenizer works)
|
||||||
|
if (strstr(response.c_str(), "<|im_start|>")) break; // Yi-34B llava-1.6
|
||||||
|
if (strstr(response.c_str(), "USER:")) break; // mistral llava-1.6
|
||||||
|
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -116,7 +116,7 @@ static bool handle_patches(clip_ctx * ctx_clip, std::vector<float *> & image_emb
|
||||||
memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
|
memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
|
||||||
// append without newline tokens (default behavior in llava_arch when not using unpad ):
|
// append without newline tokens (default behavior in llava_arch when not using unpad ):
|
||||||
memcpy(image_embd_out + clip_n_patches(ctx_clip) * clip_n_mmproj_embd(ctx_clip), (float*)result->data, clip_embd_nbytes(ctx_clip) * (image_embd_v.size()-1)); // grid patches
|
memcpy(image_embd_out + clip_n_patches(ctx_clip) * clip_n_mmproj_embd(ctx_clip), (float*)result->data, clip_embd_nbytes(ctx_clip) * (image_embd_v.size()-1)); // grid patches
|
||||||
*n_img_pos_out = result->ne[1]+clip_n_patches(ctx_clip);
|
*n_img_pos_out = static_cast<int>(result->ne[1]+clip_n_patches(ctx_clip));
|
||||||
|
|
||||||
// Debug: Test single segments
|
// Debug: Test single segments
|
||||||
// Current findings: sending base image, sending a segment embedding all works similar to python
|
// Current findings: sending base image, sending a segment embedding all works similar to python
|
||||||
|
@ -179,12 +179,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
||||||
bool encoded = clip_image_encode(ctx_clip, n_threads, img_res_v[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside
|
bool encoded = clip_image_encode(ctx_clip, n_threads, img_res_v[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside
|
||||||
clip_image_f32_free(img_res_v[i]);
|
clip_image_f32_free(img_res_v[i]);
|
||||||
if (!encoded) {
|
if (!encoded) {
|
||||||
fprintf(stderr, "Unable to encode image - spatial_unpad - subimage %d of %d\n", i+1, img_res_v.size());
|
fprintf(stderr, "Unable to encode image - spatial_unpad - subimage %d of %d\n", i+1, (int)img_res_v.size());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const int64_t t_img_enc_batch_us = ggml_time_us();
|
const int64_t t_img_enc_batch_us = ggml_time_us();
|
||||||
printf("%s: %d segments encoded in %8.2f ms\n", __func__, img_res_v.size(), (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
|
printf("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size(), (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
|
||||||
|
|
||||||
|
|
||||||
std::vector<std::pair<int, int>> grid_pinpoints;
|
std::vector<std::pair<int, int>> grid_pinpoints;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue