fix memeory error & clean the print statements

This commit is contained in:
root 2024-10-07 21:36:10 +00:00
parent aa23425236
commit 953bef9374
3 changed files with 36 additions and 70 deletions

View file

@ -1504,6 +1504,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
// kv
const int n_kv = gguf_get_n_kv(ctx);
// std::cout << "do I have n_kv here at clip.cpp "<< __LINE__ << "? : " << gguf_get_n_kv(ctx) <<std::endl;
LOG_TEE("%s: loaded meta data with %d key-value pairs and %d tensors from %s\n",
__func__, n_kv, n_tensors, fname);
{
@ -1981,7 +1984,6 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
size_t compute_memory_buffer_size = ggml_gallocr_get_buffer_size(new_clip->compute_alloc, 0);
LOG_TEE("%s: compute allocated memory: %.2f MB\n", __func__, compute_memory_buffer_size /1024.0/1024.0);
}
return new_clip;
}
@ -2424,7 +2426,6 @@ int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip) {
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
// res_imgs memory is being allocated here, previous allocations will be freed if found
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
if(clip_is_minicpmv(ctx)){
int max_slice_nums = 9;
std::vector<std::vector<clip_image_u8 *>> imgs = uhd_slice_image(img, max_slice_nums);
@ -2497,7 +2498,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
possible_resolutions.push_back({params.image_grid_pinpoints[i], params.image_grid_pinpoints[i+1]});
}
std::pair<int, int> best_resolution = select_best_resolution({img->nx, img->ny}, possible_resolutions);
printf("best_resolution: %d %d\n", best_resolution.first, best_resolution.second);
// printf("best_resolution: %d %d\n", best_resolution.first, best_resolution.second);
// clip_image_save_to_bmp(*img, "input.bmp");
resize_and_pad_image(*img, *temp, best_resolution); // we do not pad with mean-bg color anymore in llava-1.6
// clip_image_save_to_bmp(*temp, "resized.bmp");
@ -2624,7 +2625,6 @@ ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx) {
void clip_free(clip_ctx * ctx) {
ggml_free(ctx->ctx_data);
gguf_free(ctx->ctx_gguf);
ggml_backend_buffer_free(ctx->params_buffer);
ggml_backend_free(ctx->backend);
ggml_gallocr_free(ctx->compute_alloc);
@ -2807,10 +2807,8 @@ bool clip_image_encode_tokenizer(struct clip_ctx * ctx, int batch_size, ggml_ten
ggml_gallocr_alloc_graph(ctx->compute_alloc, gf);
ggml_backend_graph_compute(ctx->backend, gf);
struct ggml_tensor * llm_inputs = gf->nodes[gf->n_nodes - 1];
print_my_tensor(llm_inputs, "llm_inputs", 1);
// exit(0);
ggml_backend_tensor_get(llm_inputs, image_embd, 0, ggml_nbytes(llm_inputs));
clip_free(ctx);
// clip_free(ctx); // debug: llava_ctx was freed here, now free all the 'ctx' memory outside.
return true;
}

View file

@ -44,12 +44,12 @@ static bool eval_string(struct llama_context *ctx_llama, const char *str, int n_
std::string str2 = str;
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true);
printf("!!prompt to eval!!: %s", str);
printf("----------------------\n");
// for (auto token : embd_inp){
// printf("%6d, ", token);
// }
printf("\n");
// printf("!!prompt to eval!!: %s", str);
// printf("----------------------\n");
// // for (auto token : embd_inp){
// // printf("%6d, ", token);
// // }
// printf("\n");
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
return true;
}
@ -217,23 +217,23 @@ static void process_prompt(struct llava_context *ctx_llava, struct llava_image_e
user_prompt = prompt.substr(image_pos + std::string("<image>").length());
LOG_TEE("system_prompt: %s\n", system_prompt.c_str());
// phi3-tokenizer https://github.com/ggerganov/llama.cpp/issues/7938
if (params->verbose_prompt)
{
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
for (int i = 0; i < (int)tmp.size(); i++)
{
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
}
}
// if (params->verbose_prompt)
// {
// auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
// for (int i = 0; i < (int)tmp.size(); i++)
// {
// LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
// }
// }
LOG_TEE("user_prompt: %s\n", user_prompt.c_str());
if (params->verbose_prompt)
{
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
for (int i = 0; i < (int)tmp.size(); i++)
{
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
}
}
// if (params->verbose_prompt)
// {
// auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
// for (int i = 0; i < (int)tmp.size(); i++)
// {
// LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
// }
// }
}
else
{
@ -280,11 +280,11 @@ static void process_prompt(struct llava_context *ctx_llava, struct llava_image_e
response += tmp;
// printf("%s", tmp);
if (strcmp(tmp, "<|end|>") == 0){
printf("\n STOP GENERATING because I saw <|end|>\n");
// printf("\n STOP GENERATING because I saw <|end|>\n");
break;
}
if (strcmp(tmp, "</s>") == 0) {
printf("\n STOP GENERATING because I saw </s>\n");
// printf("\n STOP GENERATING because I saw </s>\n");
break;
}
if (strstr(tmp, "###")) break; // Yi-VL behavior
@ -327,7 +327,6 @@ static struct llava_context *llava_init_context(gpt_params *params, llama_model
}
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/1);
llama_context_params ctx_params = llama_context_params_from_gpt_params(*params);
ctx_params.n_ctx =
params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
@ -350,10 +349,6 @@ static struct llava_context *llava_init_context(gpt_params *params, llama_model
static void llava_free(struct llava_context * ctx_llava) {
if (ctx_llava->ctx_clip) {
printf(
"YD:::Segmentation fault here; Because header.n_kv is empty\n clip_free->gguf_free(ctx->ctx_gguf)-> for "
"(uint64_t i = 0; i < ctx->header.n_kv; ++i)\n");
exit(1);
clip_free(ctx_llava->ctx_clip);
ctx_llava->ctx_clip = NULL;
}
@ -528,7 +523,6 @@ int main(int argc, char ** argv) {
if (prompt_contains_image(params.prompt))
{
auto ctx_llava = llava_init_context(&params, model);
auto image_embed = load_image(ctx_llava, &params, "");
// process the prompt
@ -543,11 +537,11 @@ int main(int argc, char ** argv) {
{
for (auto &image : params.image)
{
printf("image: %s\n", image.c_str());
// printf("image: %s\n", image.c_str());
auto ctx_llava = llava_init_context(&params, model);
auto image_embed = load_image(ctx_llava, &params, image);
printf("n_image_pos: %d\n", image_embed->n_image_pos);
// printf("n_image_pos: %d\n", image_embed->n_image_pos);
if (!image_embed)
{
std::cerr << "error: failed to load image " << image << ". Terminating\n\n";
@ -575,6 +569,6 @@ int main(int argc, char ** argv) {
// ctx_llava->model = NULL;
// llava_free(ctx_llava);
// }
printf("Remember to remove print_tensor function in xgenmm.cpp and clip.cpp\n");
// printf("Remember to remove print_tensor function in xgenmm.cpp and clip.cpp\n");
return 0;
}

View file

@ -534,7 +534,7 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image
scale_factor = (float)current_height / (float)original_height;
int new_width = int(original_width * scale_factor);
int padding = (current_width - new_width) / 2;
printf("new_width: %d, padding: %d\n", new_width, padding);
// printf("new_width: %d, padding: %d\n", new_width, padding);
for (int i = 0; i < current_height; i++){
for (int j = 0; j < current_width; j++){
if (j < padding || j >= current_width - padding)
@ -562,33 +562,6 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image
attention_mask = gf->nodes[gf->n_nodes - 1];
// memcpy(image_embd_v_m_mask_out, (float *)attention_mask->data, ggml_nbytes(attention_mask));
// {
// printf((" ========================= DEBUG =========================\n"));
// printf("Load pre-computed image embeddings and attention_mask\n");
// std::string filename = "/export/home/ggml/examples/projectors/receipt_5patches_vision_features.gguf";
// tensor_from_gguf tensor;
// bool is_successful = load_tensor_from_file(filename.c_str(), tensor);
// if (!is_successful)
// {
// fprintf(stderr, "%s: load_tensor_from_file() failed\n", __func__);
// return 1;
// }
// result = tensor.data;
// // print_tensor(result, "result", 1);
// filename = "/export/home/ggml/examples/projectors/receipt_5patches_vision_attn_masks.gguf";
// is_successful = load_tensor_from_file(filename.c_str(), tensor);
// if (!is_successful)
// {
// fprintf(stderr, "%s: load_tensor_from_file() failed\n", __func__);
// return 1;
// }
// attention_mask = tensor.data;
// // print_tensor(attention_mask, "attention_mask", 1);
// num_patches_width = 2;
// num_patches_height = 2;
// }
// compute attnetion masks outside of the graph
struct ggml_tensor * attn_bias_input;
struct ggml_context * ctx0;
@ -639,7 +612,7 @@ static bool clip_xgenmm_handle_vit_patches(clip_ctx *ctx_clip , const clip_image
int batch_size = num_patches_width * num_patches_height + 1;
// print_tensor(attn_bias_input, "attn_bias_input", 1);
// print_tensor(result, "result", 1);
printf("batch_size: %d\n", batch_size);
// printf("batch_size: %d\n", batch_size);
const bool encoded = clip_image_encode_tokenizer(
ctx_clip, batch_size, result, attn_bias_input, image_embd);
if (!encoded){
@ -982,6 +955,7 @@ bool llava_image_embed_make_with_clip_img(clip_ctx *ctx_clip, int n_threads, con
free(image_embd);
return false;
}
*image_embd_out = image_embd;
*n_img_pos_out = n_img_pos;