clip : style changes
This commit is contained in:
parent
65f7455cea
commit
6e299132e7
2 changed files with 36 additions and 38 deletions
|
@ -747,8 +747,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
}
|
}
|
||||||
|
|
||||||
// llava projector
|
// llava projector
|
||||||
if(ctx->has_llava_projector)
|
if (ctx->has_llava_projector) {
|
||||||
{
|
|
||||||
embeddings = ggml_reshape_2d(ctx0, embeddings, embeddings->ne[0], embeddings->ne[1]);
|
embeddings = ggml_reshape_2d(ctx0, embeddings, embeddings->ne[0], embeddings->ne[1]);
|
||||||
|
|
||||||
struct ggml_tensor * patches = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_patches);
|
struct ggml_tensor * patches = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_patches);
|
||||||
|
@ -770,8 +769,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
||||||
embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
|
embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
|
||||||
|
|
||||||
}
|
} else if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
|
||||||
else if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
|
|
||||||
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
||||||
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
|
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
|
||||||
// ggml_tensor_printf(embeddings, "mm_0_w",0,true,false);
|
// ggml_tensor_printf(embeddings, "mm_0_w",0,true,false);
|
||||||
|
@ -939,11 +937,12 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
q = ggml_norm(ctx0, q, eps);
|
q = ggml_norm(ctx0, q, eps);
|
||||||
q = ggml_add(ctx0, ggml_mul(ctx0, q, model.mm_model_ln_q_w), model.mm_model_ln_q_b);
|
q = ggml_add(ctx0, ggml_mul(ctx0, q, model.mm_model_ln_q_w), model.mm_model_ln_q_b);
|
||||||
}
|
}
|
||||||
struct ggml_tensor *k, *v = ggml_mul_mat(ctx0, model.mm_model_kv_proj, embeddings);
|
struct ggml_tensor * v = ggml_mul_mat(ctx0, model.mm_model_kv_proj, embeddings);
|
||||||
{ // layernorm
|
{ // layernorm
|
||||||
v = ggml_norm(ctx0, v, eps);
|
v = ggml_norm(ctx0, v, eps);
|
||||||
v = ggml_add(ctx0, ggml_mul(ctx0, v, model.mm_model_ln_kv_w), model.mm_model_ln_kv_b);
|
v = ggml_add(ctx0, ggml_mul(ctx0, v, model.mm_model_ln_kv_w), model.mm_model_ln_kv_b);
|
||||||
}
|
}
|
||||||
|
struct ggml_tensor * k;
|
||||||
{ // position
|
{ // position
|
||||||
// q = ggml_add(ctx0, q, model.mm_model_pos_embed);
|
// q = ggml_add(ctx0, q, model.mm_model_pos_embed);
|
||||||
k = ggml_add(ctx0, v, pos_embed);
|
k = ggml_add(ctx0, v, pos_embed);
|
||||||
|
@ -1906,7 +1905,6 @@ int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip){
|
||||||
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
|
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
|
||||||
// res_imgs memory is being allocated here, previous allocations will be freed if found
|
// res_imgs memory is being allocated here, previous allocations will be freed if found
|
||||||
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
|
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
|
||||||
|
|
||||||
if (clip_is_minicpmv(ctx)) {
|
if (clip_is_minicpmv(ctx)) {
|
||||||
std::vector<std::vector<clip_image_u8 *>> imgs = uhd_slice_image(img);
|
std::vector<std::vector<clip_image_u8 *>> imgs = uhd_slice_image(img);
|
||||||
res_imgs->size = 0;
|
res_imgs->size = 0;
|
||||||
|
@ -2273,7 +2271,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
int image_size_width = image_size;
|
int image_size_width = image_size;
|
||||||
int image_size_height = image_size;
|
int image_size_height = image_size;
|
||||||
if (ctx->has_minicpmv_projector) {
|
if (ctx->has_minicpmv_projector) {
|
||||||
image_size_width = imgs->data[0].nx;;
|
image_size_width = imgs->data[0].nx;
|
||||||
image_size_height = imgs->data[0].ny;
|
image_size_height = imgs->data[0].ny;
|
||||||
}
|
}
|
||||||
const int patch_size = hparams.patch_size;
|
const int patch_size = hparams.patch_size;
|
||||||
|
@ -2343,8 +2341,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
ggml_backend_tensor_set(pos_embed, pos_embed_data, 0, ggml_nbytes(pos_embed));
|
ggml_backend_tensor_set(pos_embed, pos_embed_data, 0, ggml_nbytes(pos_embed));
|
||||||
free(pos_embed_data);
|
free(pos_embed_data);
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else{
|
|
||||||
{
|
{
|
||||||
if (ctx->has_class_embedding) {
|
if (ctx->has_class_embedding) {
|
||||||
struct ggml_tensor * embeddings = ggml_graph_get_tensor(gf, "embeddings");
|
struct ggml_tensor * embeddings = ggml_graph_get_tensor(gf, "embeddings");
|
||||||
|
|
|
@ -30,6 +30,7 @@ struct clip_image_size {
|
||||||
int width;
|
int width;
|
||||||
int height;
|
int height;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct clip_image_u8_batch {
|
struct clip_image_u8_batch {
|
||||||
struct clip_image_u8 * data;
|
struct clip_image_u8 * data;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue