From 953bef9374041888416720ef4b0538734c792ee4 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 7 Oct 2024 21:36:10 +0000 Subject: [PATCH] fix memeory error & clean the print statements --- examples/xgenmm/clip.cpp | 12 +++---- examples/xgenmm/xgenmm-cli.cpp | 62 +++++++++++++++------------------- examples/xgenmm/xgenmm.cpp | 32 ++---------------- 3 files changed, 36 insertions(+), 70 deletions(-) diff --git a/examples/xgenmm/clip.cpp b/examples/xgenmm/clip.cpp index 558f906f2..96e868351 100644 --- a/examples/xgenmm/clip.cpp +++ b/examples/xgenmm/clip.cpp @@ -1504,6 +1504,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { // kv const int n_kv = gguf_get_n_kv(ctx); + + // std::cout << "do I have n_kv here at clip.cpp "<< __LINE__ << "? : " << gguf_get_n_kv(ctx) <compute_alloc, 0); LOG_TEE("%s: compute allocated memory: %.2f MB\n", __func__, compute_memory_buffer_size /1024.0/1024.0); } - return new_clip; } @@ -2424,7 +2426,6 @@ int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip) { // returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector // res_imgs memory is being allocated here, previous allocations will be freed if found bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) { - if(clip_is_minicpmv(ctx)){ int max_slice_nums = 9; std::vector> imgs = uhd_slice_image(img, max_slice_nums); @@ -2497,7 +2498,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli possible_resolutions.push_back({params.image_grid_pinpoints[i], params.image_grid_pinpoints[i+1]}); } std::pair best_resolution = select_best_resolution({img->nx, img->ny}, possible_resolutions); - printf("best_resolution: %d %d\n", best_resolution.first, best_resolution.second); + // printf("best_resolution: %d %d\n", best_resolution.first, best_resolution.second); // clip_image_save_to_bmp(*img, "input.bmp"); resize_and_pad_image(*img, *temp, best_resolution); // we do not pad with mean-bg color anymore in llava-1.6 // clip_image_save_to_bmp(*temp, "resized.bmp"); @@ -2624,7 +2625,6 @@ ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx) { void clip_free(clip_ctx * ctx) { ggml_free(ctx->ctx_data); gguf_free(ctx->ctx_gguf); - ggml_backend_buffer_free(ctx->params_buffer); ggml_backend_free(ctx->backend); ggml_gallocr_free(ctx->compute_alloc); @@ -2807,10 +2807,8 @@ bool clip_image_encode_tokenizer(struct clip_ctx * ctx, int batch_size, ggml_ten ggml_gallocr_alloc_graph(ctx->compute_alloc, gf); ggml_backend_graph_compute(ctx->backend, gf); struct ggml_tensor * llm_inputs = gf->nodes[gf->n_nodes - 1]; - print_my_tensor(llm_inputs, "llm_inputs", 1); - // exit(0); ggml_backend_tensor_get(llm_inputs, image_embd, 0, ggml_nbytes(llm_inputs)); - clip_free(ctx); + // clip_free(ctx); // debug: llava_ctx was freed here, now free all the 'ctx' memory outside. return true; } diff --git a/examples/xgenmm/xgenmm-cli.cpp b/examples/xgenmm/xgenmm-cli.cpp index c2c0e94a3..4928038a8 100644 --- a/examples/xgenmm/xgenmm-cli.cpp +++ b/examples/xgenmm/xgenmm-cli.cpp @@ -44,12 +44,12 @@ static bool eval_string(struct llama_context *ctx_llama, const char *str, int n_ std::string str2 = str; std::vector embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true); - printf("!!prompt to eval!!: %s", str); - printf("----------------------\n"); - // for (auto token : embd_inp){ - // printf("%6d, ", token); - // } - printf("\n"); + // printf("!!prompt to eval!!: %s", str); + // printf("----------------------\n"); + // // for (auto token : embd_inp){ + // // printf("%6d, ", token); + // // } + // printf("\n"); eval_tokens(ctx_llama, embd_inp, n_batch, n_past); return true; } @@ -217,23 +217,23 @@ static void process_prompt(struct llava_context *ctx_llava, struct llava_image_e user_prompt = prompt.substr(image_pos + std::string("").length()); LOG_TEE("system_prompt: %s\n", system_prompt.c_str()); // phi3-tokenizer https://github.com/ggerganov/llama.cpp/issues/7938 - if (params->verbose_prompt) - { - auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true); - for (int i = 0; i < (int)tmp.size(); i++) - { - LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); - } - } + // if (params->verbose_prompt) + // { + // auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true); + // for (int i = 0; i < (int)tmp.size(); i++) + // { + // LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); + // } + // } LOG_TEE("user_prompt: %s\n", user_prompt.c_str()); - if (params->verbose_prompt) - { - auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true); - for (int i = 0; i < (int)tmp.size(); i++) - { - LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); - } - } + // if (params->verbose_prompt) + // { + // auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true); + // for (int i = 0; i < (int)tmp.size(); i++) + // { + // LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); + // } + // } } else { @@ -280,11 +280,11 @@ static void process_prompt(struct llava_context *ctx_llava, struct llava_image_e response += tmp; // printf("%s", tmp); if (strcmp(tmp, "<|end|>") == 0){ - printf("\n STOP GENERATING because I saw <|end|>\n"); + // printf("\n STOP GENERATING because I saw <|end|>\n"); break; } if (strcmp(tmp, "") == 0) { - printf("\n STOP GENERATING because I saw \n"); + // printf("\n STOP GENERATING because I saw \n"); break; } if (strstr(tmp, "###")) break; // Yi-VL behavior @@ -327,7 +327,6 @@ static struct llava_context *llava_init_context(gpt_params *params, llama_model } auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/1); - llama_context_params ctx_params = llama_context_params_from_gpt_params(*params); ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings @@ -349,11 +348,7 @@ static struct llava_context *llava_init_context(gpt_params *params, llama_model } static void llava_free(struct llava_context * ctx_llava) { - if (ctx_llava->ctx_clip) { - printf( - "YD:::Segmentation fault here; Because header.n_kv is empty\n clip_free->gguf_free(ctx->ctx_gguf)-> for " - "(uint64_t i = 0; i < ctx->header.n_kv; ++i)\n"); - exit(1); + if (ctx_llava->ctx_clip) { clip_free(ctx_llava->ctx_clip); ctx_llava->ctx_clip = NULL; } @@ -528,7 +523,6 @@ int main(int argc, char ** argv) { if (prompt_contains_image(params.prompt)) { auto ctx_llava = llava_init_context(¶ms, model); - auto image_embed = load_image(ctx_llava, ¶ms, ""); // process the prompt @@ -543,11 +537,11 @@ int main(int argc, char ** argv) { { for (auto &image : params.image) { - printf("image: %s\n", image.c_str()); + // printf("image: %s\n", image.c_str()); auto ctx_llava = llava_init_context(¶ms, model); auto image_embed = load_image(ctx_llava, ¶ms, image); - printf("n_image_pos: %d\n", image_embed->n_image_pos); + // printf("n_image_pos: %d\n", image_embed->n_image_pos); if (!image_embed) { std::cerr << "error: failed to load image " << image << ". Terminating\n\n"; @@ -575,6 +569,6 @@ int main(int argc, char ** argv) { // ctx_llava->model = NULL; // llava_free(ctx_llava); // } - printf("Remember to remove print_tensor function in xgenmm.cpp and clip.cpp\n"); + // printf("Remember to remove print_tensor function in xgenmm.cpp and clip.cpp\n"); return 0; } diff --git a/examples/xgenmm/xgenmm.cpp b/examples/xgenmm/xgenmm.cpp index b82cfa728..dd6a81e33 100644 --- a/examples/xgenmm/xgenmm.cpp +++ b/examples/xgenmm/xgenmm.cpp @@ -534,7 +534,7 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image scale_factor = (float)current_height / (float)original_height; int new_width = int(original_width * scale_factor); int padding = (current_width - new_width) / 2; - printf("new_width: %d, padding: %d\n", new_width, padding); + // printf("new_width: %d, padding: %d\n", new_width, padding); for (int i = 0; i < current_height; i++){ for (int j = 0; j < current_width; j++){ if (j < padding || j >= current_width - padding) @@ -561,33 +561,6 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image ggml_graph_compute_with_ctx(mask.ctx, gf, 1); attention_mask = gf->nodes[gf->n_nodes - 1]; // memcpy(image_embd_v_m_mask_out, (float *)attention_mask->data, ggml_nbytes(attention_mask)); - - // { - // printf((" ========================= DEBUG =========================\n")); - // printf("Load pre-computed image embeddings and attention_mask\n"); - // std::string filename = "/export/home/ggml/examples/projectors/receipt_5patches_vision_features.gguf"; - // tensor_from_gguf tensor; - // bool is_successful = load_tensor_from_file(filename.c_str(), tensor); - // if (!is_successful) - // { - // fprintf(stderr, "%s: load_tensor_from_file() failed\n", __func__); - // return 1; - // } - // result = tensor.data; - // // print_tensor(result, "result", 1); - // filename = "/export/home/ggml/examples/projectors/receipt_5patches_vision_attn_masks.gguf"; - // is_successful = load_tensor_from_file(filename.c_str(), tensor); - // if (!is_successful) - // { - // fprintf(stderr, "%s: load_tensor_from_file() failed\n", __func__); - // return 1; - // } - // attention_mask = tensor.data; - // // print_tensor(attention_mask, "attention_mask", 1); - // num_patches_width = 2; - // num_patches_height = 2; - // } - // compute attnetion masks outside of the graph struct ggml_tensor * attn_bias_input; @@ -639,7 +612,7 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image int batch_size = num_patches_width * num_patches_height + 1; // print_tensor(attn_bias_input, "attn_bias_input", 1); // print_tensor(result, "result", 1); - printf("batch_size: %d\n", batch_size); + // printf("batch_size: %d\n", batch_size); const bool encoded = clip_image_encode_tokenizer( ctx_clip, batch_size, result, attn_bias_input, image_embd); if (!encoded){ @@ -982,6 +955,7 @@ bool llava_image_embed_make_with_clip_img(clip_ctx *ctx_clip, int n_threads, con free(image_embd); return false; } + *image_embd_out = image_embd; *n_img_pos_out = n_img_pos;