common : reimplement logging (#9418)
https://github.com/ggerganov/llama.cpp/pull/9418
This commit is contained in:
parent
e6deac31f7
commit
6262d13e0b
54 changed files with 2092 additions and 2419 deletions
|
@ -1,13 +1,23 @@
|
|||
#include "clip.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "llava.h"
|
||||
#include "base64.hpp"
|
||||
|
||||
#include "llama.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0)
|
||||
#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0)
|
||||
|
||||
#define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
|
||||
#define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
|
||||
#define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
|
||||
#define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
|
||||
|
||||
// RGB uint8 image
|
||||
struct clip_image_u8 {
|
||||
|
@ -54,7 +64,7 @@ static std::pair<int, int> select_best_resolution(const std::pair<int, int>& ori
|
|||
int downscaled_height = static_cast<int>(original_height * scale);
|
||||
int effective_resolution = std::min(downscaled_width * downscaled_height, original_width * original_height);
|
||||
int wasted_resolution = (width * height) - effective_resolution;
|
||||
// LOG_TEE("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution);
|
||||
// LOG_DBG("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution);
|
||||
if (effective_resolution > max_effective_resolution || (effective_resolution == max_effective_resolution && wasted_resolution < min_wasted_resolution)) {
|
||||
max_effective_resolution = effective_resolution;
|
||||
min_wasted_resolution = wasted_resolution;
|
||||
|
@ -236,7 +246,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||
img_res_v.size = 0;
|
||||
img_res_v.data = nullptr;
|
||||
if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) {
|
||||
LOG_TEE("%s: unable to preprocess image\n", __func__);
|
||||
LOG_ERR("%s: unable to preprocess image\n", __func__);
|
||||
delete[] img_res_v.data;
|
||||
return false;
|
||||
}
|
||||
|
@ -265,14 +275,14 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||
encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]);
|
||||
}
|
||||
if (!encoded) {
|
||||
LOG_TEE("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
|
||||
LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
|
||||
return false;
|
||||
}
|
||||
const int64_t t_img_enc_steop_batch_us = ggml_time_us();
|
||||
LOG_TEE("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)img_res_v.size, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0);
|
||||
LOG_INF("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)img_res_v.size, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0);
|
||||
}
|
||||
const int64_t t_img_enc_batch_us = ggml_time_us();
|
||||
LOG_TEE("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
|
||||
LOG_INF("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
|
||||
|
||||
int n_img_pos_out = 0;
|
||||
for (size_t i = 0; i < image_embd_v.size(); i++) {
|
||||
|
@ -287,7 +297,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||
load_image_size->width = img->nx;
|
||||
load_image_size->height = img->ny;
|
||||
clip_add_load_image_size(ctx_clip, load_image_size);
|
||||
LOG_TEE("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height);
|
||||
LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height);
|
||||
}
|
||||
else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) {
|
||||
// flat / default llava-1.5 type embedding
|
||||
|
@ -295,7 +305,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||
bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd); // image_embd shape is 576 x 4096
|
||||
delete[] img_res_v.data;
|
||||
if (!encoded) {
|
||||
LOG_TEE("Unable to encode image\n");
|
||||
LOG_ERR("Unable to encode image\n");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -309,12 +319,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||
image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip)); // 576 patches * 4096 embeddings * 4 bytes = 9437184
|
||||
const bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside
|
||||
if (!encoded) {
|
||||
LOG_TEE("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
|
||||
LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const int64_t t_img_enc_batch_us = ggml_time_us();
|
||||
LOG_TEE("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
|
||||
LOG_INF("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
|
||||
|
||||
const int32_t * image_grid = clip_image_grid(ctx_clip);
|
||||
|
||||
|
@ -347,12 +357,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||
// clip_image_save_to_bmp(*tmp, "image_feature.bmp");
|
||||
}
|
||||
|
||||
LOG_TEE("%s: image embedding created: %d tokens\n", __func__, *n_img_pos);
|
||||
LOG_INF("%s: image embedding created: %d tokens\n", __func__, *n_img_pos);
|
||||
|
||||
const int64_t t_img_enc_end_us = ggml_time_us();
|
||||
float t_img_enc_ms = (t_img_enc_end_us - t_img_enc_start_us) / 1000.0;
|
||||
|
||||
LOG_TEE("\n%s: image encoded in %8.2f ms by CLIP (%8.2f ms per image patch)\n", __func__, t_img_enc_ms, t_img_enc_ms / *n_img_pos);
|
||||
LOG_INF("\n%s: image encoded in %8.2f ms by CLIP (%8.2f ms per image patch)\n", __func__, t_img_enc_ms, t_img_enc_ms / *n_img_pos);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -362,7 +372,7 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx *
|
|||
int n_llama_embd = llama_n_embd(llama_get_model(ctx_llama));
|
||||
auto n_image_embd = clip_n_mmproj_embd(ctx_clip);
|
||||
if (n_image_embd != n_llama_embd) {
|
||||
LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_llama_embd);
|
||||
LOG_ERR("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_llama_embd);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -375,13 +385,13 @@ bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, co
|
|||
}
|
||||
float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*num_max_patches); // TODO: base on gridsize/llava model
|
||||
if (!image_embd) {
|
||||
LOG_TEE("Unable to allocate memory for image embeddings\n");
|
||||
LOG_ERR("Unable to allocate memory for image embeddings\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
int n_img_pos;
|
||||
if (!encode_image_with_clip(ctx_clip, n_threads, img, image_embd, &n_img_pos)) {
|
||||
LOG_TEE("%s: cannot encode image, aborting\n", __func__);
|
||||
LOG_ERR("%s: cannot encode image, aborting\n", __func__);
|
||||
free(image_embd);
|
||||
return false;
|
||||
}
|
||||
|
@ -401,7 +411,7 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_
|
|||
}
|
||||
llama_batch batch = {int32_t(n_eval), nullptr, (image_embed->embed+i*n_embd), nullptr, nullptr, nullptr, nullptr, *n_past, 1, 0, };
|
||||
if (llama_decode(ctx_llama, batch)) {
|
||||
LOG_TEE("%s : failed to eval\n", __func__);
|
||||
LOG_ERR("%s : failed to eval\n", __func__);
|
||||
return false;
|
||||
}
|
||||
*n_past += n_eval;
|
||||
|
@ -413,7 +423,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c
|
|||
clip_image_u8 * img = clip_image_u8_init();
|
||||
if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img)) {
|
||||
clip_image_u8_free(img);
|
||||
LOG_TEE("%s: can't load image from bytes, is it a valid image?", __func__);
|
||||
LOG_ERR("%s: can't load image from bytes, is it a valid image?", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -422,7 +432,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c
|
|||
bool image_embed_result = llava_image_embed_make_with_clip_img(ctx_clip, n_threads, img, &image_embed, &n_image_pos);
|
||||
if (!image_embed_result) {
|
||||
clip_image_u8_free(img);
|
||||
LOG_TEE("%s: coulnd't embed the image\n", __func__);
|
||||
LOG_ERR("%s: coulnd't embed the image\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -436,7 +446,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c
|
|||
static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long *sizeOut) {
|
||||
auto file = fopen(path, "rb");
|
||||
if (file == NULL) {
|
||||
LOG_TEE("%s: can't read file %s\n", __func__, path);
|
||||
LOG_ERR("%s: can't read file %s\n", __func__, path);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -446,7 +456,7 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long
|
|||
|
||||
auto buffer = (unsigned char *)malloc(fileSize); // Allocate memory to hold the file data
|
||||
if (buffer == NULL) {
|
||||
LOG_TEE("%s: failed to alloc %ld bytes for file %s\n", __func__, fileSize, path);
|
||||
LOG_ERR("%s: failed to alloc %ld bytes for file %s\n", __func__, fileSize, path);
|
||||
perror("Memory allocation error");
|
||||
fclose(file);
|
||||
return false;
|
||||
|
@ -471,7 +481,7 @@ struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx
|
|||
long image_bytes_length;
|
||||
auto loaded = load_file_to_bytes(image_path, &image_bytes, &image_bytes_length);
|
||||
if (!loaded) {
|
||||
LOG_TEE("%s: failed to load %s\n", __func__, image_path);
|
||||
LOG_ERR("%s: failed to load %s\n", __func__, image_path);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue