fix memeory error & clean the print statements
This commit is contained in:
parent
aa23425236
commit
953bef9374
3 changed files with 36 additions and 70 deletions
|
@ -1504,6 +1504,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||
|
||||
// kv
|
||||
const int n_kv = gguf_get_n_kv(ctx);
|
||||
|
||||
// std::cout << "do I have n_kv here at clip.cpp "<< __LINE__ << "? : " << gguf_get_n_kv(ctx) <<std::endl;
|
||||
|
||||
LOG_TEE("%s: loaded meta data with %d key-value pairs and %d tensors from %s\n",
|
||||
__func__, n_kv, n_tensors, fname);
|
||||
{
|
||||
|
@ -1981,7 +1984,6 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||
size_t compute_memory_buffer_size = ggml_gallocr_get_buffer_size(new_clip->compute_alloc, 0);
|
||||
LOG_TEE("%s: compute allocated memory: %.2f MB\n", __func__, compute_memory_buffer_size /1024.0/1024.0);
|
||||
}
|
||||
|
||||
return new_clip;
|
||||
}
|
||||
|
||||
|
@ -2424,7 +2426,6 @@ int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip) {
|
|||
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
|
||||
// res_imgs memory is being allocated here, previous allocations will be freed if found
|
||||
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
|
||||
|
||||
if(clip_is_minicpmv(ctx)){
|
||||
int max_slice_nums = 9;
|
||||
std::vector<std::vector<clip_image_u8 *>> imgs = uhd_slice_image(img, max_slice_nums);
|
||||
|
@ -2497,7 +2498,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
|
|||
possible_resolutions.push_back({params.image_grid_pinpoints[i], params.image_grid_pinpoints[i+1]});
|
||||
}
|
||||
std::pair<int, int> best_resolution = select_best_resolution({img->nx, img->ny}, possible_resolutions);
|
||||
printf("best_resolution: %d %d\n", best_resolution.first, best_resolution.second);
|
||||
// printf("best_resolution: %d %d\n", best_resolution.first, best_resolution.second);
|
||||
// clip_image_save_to_bmp(*img, "input.bmp");
|
||||
resize_and_pad_image(*img, *temp, best_resolution); // we do not pad with mean-bg color anymore in llava-1.6
|
||||
// clip_image_save_to_bmp(*temp, "resized.bmp");
|
||||
|
@ -2624,7 +2625,6 @@ ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx) {
|
|||
void clip_free(clip_ctx * ctx) {
|
||||
ggml_free(ctx->ctx_data);
|
||||
gguf_free(ctx->ctx_gguf);
|
||||
|
||||
ggml_backend_buffer_free(ctx->params_buffer);
|
||||
ggml_backend_free(ctx->backend);
|
||||
ggml_gallocr_free(ctx->compute_alloc);
|
||||
|
@ -2807,10 +2807,8 @@ bool clip_image_encode_tokenizer(struct clip_ctx * ctx, int batch_size, ggml_ten
|
|||
ggml_gallocr_alloc_graph(ctx->compute_alloc, gf);
|
||||
ggml_backend_graph_compute(ctx->backend, gf);
|
||||
struct ggml_tensor * llm_inputs = gf->nodes[gf->n_nodes - 1];
|
||||
print_my_tensor(llm_inputs, "llm_inputs", 1);
|
||||
// exit(0);
|
||||
ggml_backend_tensor_get(llm_inputs, image_embd, 0, ggml_nbytes(llm_inputs));
|
||||
clip_free(ctx);
|
||||
// clip_free(ctx); // debug: llava_ctx was freed here, now free all the 'ctx' memory outside.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -44,12 +44,12 @@ static bool eval_string(struct llama_context *ctx_llama, const char *str, int n_
|
|||
|
||||
std::string str2 = str;
|
||||
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true);
|
||||
printf("!!prompt to eval!!: %s", str);
|
||||
printf("----------------------\n");
|
||||
// for (auto token : embd_inp){
|
||||
// printf("%6d, ", token);
|
||||
// }
|
||||
printf("\n");
|
||||
// printf("!!prompt to eval!!: %s", str);
|
||||
// printf("----------------------\n");
|
||||
// // for (auto token : embd_inp){
|
||||
// // printf("%6d, ", token);
|
||||
// // }
|
||||
// printf("\n");
|
||||
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
||||
return true;
|
||||
}
|
||||
|
@ -217,23 +217,23 @@ static void process_prompt(struct llava_context *ctx_llava, struct llava_image_e
|
|||
user_prompt = prompt.substr(image_pos + std::string("<image>").length());
|
||||
LOG_TEE("system_prompt: %s\n", system_prompt.c_str());
|
||||
// phi3-tokenizer https://github.com/ggerganov/llama.cpp/issues/7938
|
||||
if (params->verbose_prompt)
|
||||
{
|
||||
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
|
||||
for (int i = 0; i < (int)tmp.size(); i++)
|
||||
{
|
||||
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||
}
|
||||
}
|
||||
// if (params->verbose_prompt)
|
||||
// {
|
||||
// auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
|
||||
// for (int i = 0; i < (int)tmp.size(); i++)
|
||||
// {
|
||||
// LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||
// }
|
||||
// }
|
||||
LOG_TEE("user_prompt: %s\n", user_prompt.c_str());
|
||||
if (params->verbose_prompt)
|
||||
{
|
||||
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
||||
for (int i = 0; i < (int)tmp.size(); i++)
|
||||
{
|
||||
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||
}
|
||||
}
|
||||
// if (params->verbose_prompt)
|
||||
// {
|
||||
// auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
||||
// for (int i = 0; i < (int)tmp.size(); i++)
|
||||
// {
|
||||
// LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||
// }
|
||||
// }
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -280,11 +280,11 @@ static void process_prompt(struct llava_context *ctx_llava, struct llava_image_e
|
|||
response += tmp;
|
||||
// printf("%s", tmp);
|
||||
if (strcmp(tmp, "<|end|>") == 0){
|
||||
printf("\n STOP GENERATING because I saw <|end|>\n");
|
||||
// printf("\n STOP GENERATING because I saw <|end|>\n");
|
||||
break;
|
||||
}
|
||||
if (strcmp(tmp, "</s>") == 0) {
|
||||
printf("\n STOP GENERATING because I saw </s>\n");
|
||||
// printf("\n STOP GENERATING because I saw </s>\n");
|
||||
break;
|
||||
}
|
||||
if (strstr(tmp, "###")) break; // Yi-VL behavior
|
||||
|
@ -327,7 +327,6 @@ static struct llava_context *llava_init_context(gpt_params *params, llama_model
|
|||
}
|
||||
|
||||
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/1);
|
||||
|
||||
llama_context_params ctx_params = llama_context_params_from_gpt_params(*params);
|
||||
ctx_params.n_ctx =
|
||||
params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
|
||||
|
@ -349,11 +348,7 @@ static struct llava_context *llava_init_context(gpt_params *params, llama_model
|
|||
}
|
||||
|
||||
static void llava_free(struct llava_context * ctx_llava) {
|
||||
if (ctx_llava->ctx_clip) {
|
||||
printf(
|
||||
"YD:::Segmentation fault here; Because header.n_kv is empty\n clip_free->gguf_free(ctx->ctx_gguf)-> for "
|
||||
"(uint64_t i = 0; i < ctx->header.n_kv; ++i)\n");
|
||||
exit(1);
|
||||
if (ctx_llava->ctx_clip) {
|
||||
clip_free(ctx_llava->ctx_clip);
|
||||
ctx_llava->ctx_clip = NULL;
|
||||
}
|
||||
|
@ -528,7 +523,6 @@ int main(int argc, char ** argv) {
|
|||
if (prompt_contains_image(params.prompt))
|
||||
{
|
||||
auto ctx_llava = llava_init_context(¶ms, model);
|
||||
|
||||
auto image_embed = load_image(ctx_llava, ¶ms, "");
|
||||
|
||||
// process the prompt
|
||||
|
@ -543,11 +537,11 @@ int main(int argc, char ** argv) {
|
|||
{
|
||||
for (auto &image : params.image)
|
||||
{
|
||||
printf("image: %s\n", image.c_str());
|
||||
// printf("image: %s\n", image.c_str());
|
||||
auto ctx_llava = llava_init_context(¶ms, model);
|
||||
|
||||
auto image_embed = load_image(ctx_llava, ¶ms, image);
|
||||
printf("n_image_pos: %d\n", image_embed->n_image_pos);
|
||||
// printf("n_image_pos: %d\n", image_embed->n_image_pos);
|
||||
if (!image_embed)
|
||||
{
|
||||
std::cerr << "error: failed to load image " << image << ". Terminating\n\n";
|
||||
|
@ -575,6 +569,6 @@ int main(int argc, char ** argv) {
|
|||
// ctx_llava->model = NULL;
|
||||
// llava_free(ctx_llava);
|
||||
// }
|
||||
printf("Remember to remove print_tensor function in xgenmm.cpp and clip.cpp\n");
|
||||
// printf("Remember to remove print_tensor function in xgenmm.cpp and clip.cpp\n");
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -534,7 +534,7 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image
|
|||
scale_factor = (float)current_height / (float)original_height;
|
||||
int new_width = int(original_width * scale_factor);
|
||||
int padding = (current_width - new_width) / 2;
|
||||
printf("new_width: %d, padding: %d\n", new_width, padding);
|
||||
// printf("new_width: %d, padding: %d\n", new_width, padding);
|
||||
for (int i = 0; i < current_height; i++){
|
||||
for (int j = 0; j < current_width; j++){
|
||||
if (j < padding || j >= current_width - padding)
|
||||
|
@ -561,33 +561,6 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image
|
|||
ggml_graph_compute_with_ctx(mask.ctx, gf, 1);
|
||||
attention_mask = gf->nodes[gf->n_nodes - 1];
|
||||
// memcpy(image_embd_v_m_mask_out, (float *)attention_mask->data, ggml_nbytes(attention_mask));
|
||||
|
||||
// {
|
||||
// printf((" ========================= DEBUG =========================\n"));
|
||||
// printf("Load pre-computed image embeddings and attention_mask\n");
|
||||
// std::string filename = "/export/home/ggml/examples/projectors/receipt_5patches_vision_features.gguf";
|
||||
// tensor_from_gguf tensor;
|
||||
// bool is_successful = load_tensor_from_file(filename.c_str(), tensor);
|
||||
// if (!is_successful)
|
||||
// {
|
||||
// fprintf(stderr, "%s: load_tensor_from_file() failed\n", __func__);
|
||||
// return 1;
|
||||
// }
|
||||
// result = tensor.data;
|
||||
// // print_tensor(result, "result", 1);
|
||||
// filename = "/export/home/ggml/examples/projectors/receipt_5patches_vision_attn_masks.gguf";
|
||||
// is_successful = load_tensor_from_file(filename.c_str(), tensor);
|
||||
// if (!is_successful)
|
||||
// {
|
||||
// fprintf(stderr, "%s: load_tensor_from_file() failed\n", __func__);
|
||||
// return 1;
|
||||
// }
|
||||
// attention_mask = tensor.data;
|
||||
// // print_tensor(attention_mask, "attention_mask", 1);
|
||||
// num_patches_width = 2;
|
||||
// num_patches_height = 2;
|
||||
// }
|
||||
|
||||
|
||||
// compute attnetion masks outside of the graph
|
||||
struct ggml_tensor * attn_bias_input;
|
||||
|
@ -639,7 +612,7 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image
|
|||
int batch_size = num_patches_width * num_patches_height + 1;
|
||||
// print_tensor(attn_bias_input, "attn_bias_input", 1);
|
||||
// print_tensor(result, "result", 1);
|
||||
printf("batch_size: %d\n", batch_size);
|
||||
// printf("batch_size: %d\n", batch_size);
|
||||
const bool encoded = clip_image_encode_tokenizer(
|
||||
ctx_clip, batch_size, result, attn_bias_input, image_embd);
|
||||
if (!encoded){
|
||||
|
@ -982,6 +955,7 @@ bool llava_image_embed_make_with_clip_img(clip_ctx *ctx_clip, int n_threads, con
|
|||
free(image_embd);
|
||||
return false;
|
||||
}
|
||||
|
||||
*image_embd_out = image_embd;
|
||||
*n_img_pos_out = n_img_pos;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue