fix memeory error & clean the print statements

This commit is contained in:
root 2024-10-07 21:36:10 +00:00
parent aa23425236
commit 953bef9374
3 changed files with 36 additions and 70 deletions

View file

@ -1504,6 +1504,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
// kv // kv
const int n_kv = gguf_get_n_kv(ctx); const int n_kv = gguf_get_n_kv(ctx);
// std::cout << "do I have n_kv here at clip.cpp "<< __LINE__ << "? : " << gguf_get_n_kv(ctx) <<std::endl;
LOG_TEE("%s: loaded meta data with %d key-value pairs and %d tensors from %s\n", LOG_TEE("%s: loaded meta data with %d key-value pairs and %d tensors from %s\n",
__func__, n_kv, n_tensors, fname); __func__, n_kv, n_tensors, fname);
{ {
@ -1981,7 +1984,6 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
size_t compute_memory_buffer_size = ggml_gallocr_get_buffer_size(new_clip->compute_alloc, 0); size_t compute_memory_buffer_size = ggml_gallocr_get_buffer_size(new_clip->compute_alloc, 0);
LOG_TEE("%s: compute allocated memory: %.2f MB\n", __func__, compute_memory_buffer_size /1024.0/1024.0); LOG_TEE("%s: compute allocated memory: %.2f MB\n", __func__, compute_memory_buffer_size /1024.0/1024.0);
} }
return new_clip; return new_clip;
} }
@ -2424,7 +2426,6 @@ int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip) {
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector // returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
// res_imgs memory is being allocated here, previous allocations will be freed if found // res_imgs memory is being allocated here, previous allocations will be freed if found
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) { bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
if(clip_is_minicpmv(ctx)){ if(clip_is_minicpmv(ctx)){
int max_slice_nums = 9; int max_slice_nums = 9;
std::vector<std::vector<clip_image_u8 *>> imgs = uhd_slice_image(img, max_slice_nums); std::vector<std::vector<clip_image_u8 *>> imgs = uhd_slice_image(img, max_slice_nums);
@ -2497,7 +2498,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
possible_resolutions.push_back({params.image_grid_pinpoints[i], params.image_grid_pinpoints[i+1]}); possible_resolutions.push_back({params.image_grid_pinpoints[i], params.image_grid_pinpoints[i+1]});
} }
std::pair<int, int> best_resolution = select_best_resolution({img->nx, img->ny}, possible_resolutions); std::pair<int, int> best_resolution = select_best_resolution({img->nx, img->ny}, possible_resolutions);
printf("best_resolution: %d %d\n", best_resolution.first, best_resolution.second); // printf("best_resolution: %d %d\n", best_resolution.first, best_resolution.second);
// clip_image_save_to_bmp(*img, "input.bmp"); // clip_image_save_to_bmp(*img, "input.bmp");
resize_and_pad_image(*img, *temp, best_resolution); // we do not pad with mean-bg color anymore in llava-1.6 resize_and_pad_image(*img, *temp, best_resolution); // we do not pad with mean-bg color anymore in llava-1.6
// clip_image_save_to_bmp(*temp, "resized.bmp"); // clip_image_save_to_bmp(*temp, "resized.bmp");
@ -2624,7 +2625,6 @@ ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx) {
void clip_free(clip_ctx * ctx) { void clip_free(clip_ctx * ctx) {
ggml_free(ctx->ctx_data); ggml_free(ctx->ctx_data);
gguf_free(ctx->ctx_gguf); gguf_free(ctx->ctx_gguf);
ggml_backend_buffer_free(ctx->params_buffer); ggml_backend_buffer_free(ctx->params_buffer);
ggml_backend_free(ctx->backend); ggml_backend_free(ctx->backend);
ggml_gallocr_free(ctx->compute_alloc); ggml_gallocr_free(ctx->compute_alloc);
@ -2807,10 +2807,8 @@ bool clip_image_encode_tokenizer(struct clip_ctx * ctx, int batch_size, ggml_ten
ggml_gallocr_alloc_graph(ctx->compute_alloc, gf); ggml_gallocr_alloc_graph(ctx->compute_alloc, gf);
ggml_backend_graph_compute(ctx->backend, gf); ggml_backend_graph_compute(ctx->backend, gf);
struct ggml_tensor * llm_inputs = gf->nodes[gf->n_nodes - 1]; struct ggml_tensor * llm_inputs = gf->nodes[gf->n_nodes - 1];
print_my_tensor(llm_inputs, "llm_inputs", 1);
// exit(0);
ggml_backend_tensor_get(llm_inputs, image_embd, 0, ggml_nbytes(llm_inputs)); ggml_backend_tensor_get(llm_inputs, image_embd, 0, ggml_nbytes(llm_inputs));
clip_free(ctx); // clip_free(ctx); // debug: llava_ctx was freed here, now free all the 'ctx' memory outside.
return true; return true;
} }

View file

@ -44,12 +44,12 @@ static bool eval_string(struct llama_context *ctx_llama, const char *str, int n_
std::string str2 = str; std::string str2 = str;
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true); std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true);
printf("!!prompt to eval!!: %s", str); // printf("!!prompt to eval!!: %s", str);
printf("----------------------\n"); // printf("----------------------\n");
// for (auto token : embd_inp){ // // for (auto token : embd_inp){
// printf("%6d, ", token); // // printf("%6d, ", token);
// } // // }
printf("\n"); // printf("\n");
eval_tokens(ctx_llama, embd_inp, n_batch, n_past); eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
return true; return true;
} }
@ -217,23 +217,23 @@ static void process_prompt(struct llava_context *ctx_llava, struct llava_image_e
user_prompt = prompt.substr(image_pos + std::string("<image>").length()); user_prompt = prompt.substr(image_pos + std::string("<image>").length());
LOG_TEE("system_prompt: %s\n", system_prompt.c_str()); LOG_TEE("system_prompt: %s\n", system_prompt.c_str());
// phi3-tokenizer https://github.com/ggerganov/llama.cpp/issues/7938 // phi3-tokenizer https://github.com/ggerganov/llama.cpp/issues/7938
if (params->verbose_prompt) // if (params->verbose_prompt)
{ // {
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true); // auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
for (int i = 0; i < (int)tmp.size(); i++) // for (int i = 0; i < (int)tmp.size(); i++)
{ // {
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); // LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
} // }
} // }
LOG_TEE("user_prompt: %s\n", user_prompt.c_str()); LOG_TEE("user_prompt: %s\n", user_prompt.c_str());
if (params->verbose_prompt) // if (params->verbose_prompt)
{ // {
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true); // auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
for (int i = 0; i < (int)tmp.size(); i++) // for (int i = 0; i < (int)tmp.size(); i++)
{ // {
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); // LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
} // }
} // }
} }
else else
{ {
@ -280,11 +280,11 @@ static void process_prompt(struct llava_context *ctx_llava, struct llava_image_e
response += tmp; response += tmp;
// printf("%s", tmp); // printf("%s", tmp);
if (strcmp(tmp, "<|end|>") == 0){ if (strcmp(tmp, "<|end|>") == 0){
printf("\n STOP GENERATING because I saw <|end|>\n"); // printf("\n STOP GENERATING because I saw <|end|>\n");
break; break;
} }
if (strcmp(tmp, "</s>") == 0) { if (strcmp(tmp, "</s>") == 0) {
printf("\n STOP GENERATING because I saw </s>\n"); // printf("\n STOP GENERATING because I saw </s>\n");
break; break;
} }
if (strstr(tmp, "###")) break; // Yi-VL behavior if (strstr(tmp, "###")) break; // Yi-VL behavior
@ -327,7 +327,6 @@ static struct llava_context *llava_init_context(gpt_params *params, llama_model
} }
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/1); auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/1);
llama_context_params ctx_params = llama_context_params_from_gpt_params(*params); llama_context_params ctx_params = llama_context_params_from_gpt_params(*params);
ctx_params.n_ctx = ctx_params.n_ctx =
params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
@ -349,11 +348,7 @@ static struct llava_context *llava_init_context(gpt_params *params, llama_model
} }
static void llava_free(struct llava_context * ctx_llava) { static void llava_free(struct llava_context * ctx_llava) {
if (ctx_llava->ctx_clip) { if (ctx_llava->ctx_clip) {
printf(
"YD:::Segmentation fault here; Because header.n_kv is empty\n clip_free->gguf_free(ctx->ctx_gguf)-> for "
"(uint64_t i = 0; i < ctx->header.n_kv; ++i)\n");
exit(1);
clip_free(ctx_llava->ctx_clip); clip_free(ctx_llava->ctx_clip);
ctx_llava->ctx_clip = NULL; ctx_llava->ctx_clip = NULL;
} }
@ -528,7 +523,6 @@ int main(int argc, char ** argv) {
if (prompt_contains_image(params.prompt)) if (prompt_contains_image(params.prompt))
{ {
auto ctx_llava = llava_init_context(&params, model); auto ctx_llava = llava_init_context(&params, model);
auto image_embed = load_image(ctx_llava, &params, ""); auto image_embed = load_image(ctx_llava, &params, "");
// process the prompt // process the prompt
@ -543,11 +537,11 @@ int main(int argc, char ** argv) {
{ {
for (auto &image : params.image) for (auto &image : params.image)
{ {
printf("image: %s\n", image.c_str()); // printf("image: %s\n", image.c_str());
auto ctx_llava = llava_init_context(&params, model); auto ctx_llava = llava_init_context(&params, model);
auto image_embed = load_image(ctx_llava, &params, image); auto image_embed = load_image(ctx_llava, &params, image);
printf("n_image_pos: %d\n", image_embed->n_image_pos); // printf("n_image_pos: %d\n", image_embed->n_image_pos);
if (!image_embed) if (!image_embed)
{ {
std::cerr << "error: failed to load image " << image << ". Terminating\n\n"; std::cerr << "error: failed to load image " << image << ". Terminating\n\n";
@ -575,6 +569,6 @@ int main(int argc, char ** argv) {
// ctx_llava->model = NULL; // ctx_llava->model = NULL;
// llava_free(ctx_llava); // llava_free(ctx_llava);
// } // }
printf("Remember to remove print_tensor function in xgenmm.cpp and clip.cpp\n"); // printf("Remember to remove print_tensor function in xgenmm.cpp and clip.cpp\n");
return 0; return 0;
} }

View file

@ -534,7 +534,7 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image
scale_factor = (float)current_height / (float)original_height; scale_factor = (float)current_height / (float)original_height;
int new_width = int(original_width * scale_factor); int new_width = int(original_width * scale_factor);
int padding = (current_width - new_width) / 2; int padding = (current_width - new_width) / 2;
printf("new_width: %d, padding: %d\n", new_width, padding); // printf("new_width: %d, padding: %d\n", new_width, padding);
for (int i = 0; i < current_height; i++){ for (int i = 0; i < current_height; i++){
for (int j = 0; j < current_width; j++){ for (int j = 0; j < current_width; j++){
if (j < padding || j >= current_width - padding) if (j < padding || j >= current_width - padding)
@ -561,33 +561,6 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image
ggml_graph_compute_with_ctx(mask.ctx, gf, 1); ggml_graph_compute_with_ctx(mask.ctx, gf, 1);
attention_mask = gf->nodes[gf->n_nodes - 1]; attention_mask = gf->nodes[gf->n_nodes - 1];
// memcpy(image_embd_v_m_mask_out, (float *)attention_mask->data, ggml_nbytes(attention_mask)); // memcpy(image_embd_v_m_mask_out, (float *)attention_mask->data, ggml_nbytes(attention_mask));
// {
// printf((" ========================= DEBUG =========================\n"));
// printf("Load pre-computed image embeddings and attention_mask\n");
// std::string filename = "/export/home/ggml/examples/projectors/receipt_5patches_vision_features.gguf";
// tensor_from_gguf tensor;
// bool is_successful = load_tensor_from_file(filename.c_str(), tensor);
// if (!is_successful)
// {
// fprintf(stderr, "%s: load_tensor_from_file() failed\n", __func__);
// return 1;
// }
// result = tensor.data;
// // print_tensor(result, "result", 1);
// filename = "/export/home/ggml/examples/projectors/receipt_5patches_vision_attn_masks.gguf";
// is_successful = load_tensor_from_file(filename.c_str(), tensor);
// if (!is_successful)
// {
// fprintf(stderr, "%s: load_tensor_from_file() failed\n", __func__);
// return 1;
// }
// attention_mask = tensor.data;
// // print_tensor(attention_mask, "attention_mask", 1);
// num_patches_width = 2;
// num_patches_height = 2;
// }
// compute attnetion masks outside of the graph // compute attnetion masks outside of the graph
struct ggml_tensor * attn_bias_input; struct ggml_tensor * attn_bias_input;
@ -639,7 +612,7 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image
int batch_size = num_patches_width * num_patches_height + 1; int batch_size = num_patches_width * num_patches_height + 1;
// print_tensor(attn_bias_input, "attn_bias_input", 1); // print_tensor(attn_bias_input, "attn_bias_input", 1);
// print_tensor(result, "result", 1); // print_tensor(result, "result", 1);
printf("batch_size: %d\n", batch_size); // printf("batch_size: %d\n", batch_size);
const bool encoded = clip_image_encode_tokenizer( const bool encoded = clip_image_encode_tokenizer(
ctx_clip, batch_size, result, attn_bias_input, image_embd); ctx_clip, batch_size, result, attn_bias_input, image_embd);
if (!encoded){ if (!encoded){
@ -982,6 +955,7 @@ bool llava_image_embed_make_with_clip_img(clip_ctx *ctx_clip, int n_threads, con
free(image_embd); free(image_embd);
return false; return false;
} }
*image_embd_out = image_embd; *image_embd_out = image_embd;
*n_img_pos_out = n_img_pos; *n_img_pos_out = n_img_pos;