fix type-check
This commit is contained in:
parent
4a87d1d93e
commit
32b47f600f
5 changed files with 9 additions and 10 deletions
|
@ -776,8 +776,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|||
embeddings = ggml_gelu(ctx0, embeddings);
|
||||
embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
||||
embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
|
||||
|
||||
}
|
||||
}
|
||||
else if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
|
||||
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
||||
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
|
||||
|
|
|
@ -254,7 +254,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||
image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip));
|
||||
int patch_size=14;
|
||||
load_image_size->width = img_res_v.data[i].nx;
|
||||
load_image_size->height = img_res_v.data[i].ny;
|
||||
load_image_size->height = img_res_v.data[i].ny;
|
||||
clip_add_load_image_size(ctx_clip, load_image_size);
|
||||
bool encoded = false;
|
||||
int has_minicpmv_projector = clip_is_minicpmv(ctx_clip);
|
||||
|
@ -263,7 +263,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||
}
|
||||
else if (has_minicpmv_projector == 3) {
|
||||
encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]);
|
||||
}
|
||||
}
|
||||
if (!encoded) {
|
||||
LOG_TEE("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
|
||||
return false;
|
||||
|
|
|
@ -140,7 +140,7 @@ static void process_image(struct llava_context * ctx_llava, struct llava_image_e
|
|||
}
|
||||
else if (has_minicpmv_projector == 3) {
|
||||
system_prompt = "<|im_start|>user\n";
|
||||
}
|
||||
}
|
||||
LOG_TEE("%s: image token past: %d\n", __func__, n_past);
|
||||
eval_string(ctx_llava->ctx_llama, (system_prompt+"<image>").c_str(), params->n_batch, &n_past, false);
|
||||
process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
|
||||
|
@ -223,7 +223,7 @@ static struct llama_sampling_context * llama_init(struct llava_context * ctx_lla
|
|||
}
|
||||
else if (has_minicpmv_projector == 3) {
|
||||
user_prompt = "<|im_start|>user\n" + prompt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false);
|
||||
|
@ -232,8 +232,8 @@ static struct llama_sampling_context * llama_init(struct llava_context * ctx_lla
|
|||
}
|
||||
else if (has_minicpmv_projector == 3) {
|
||||
eval_string(ctx_llava->ctx_llama, "<|im_end|><|im_start|>assistant\n", params->n_batch, &n_past, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// generate the response
|
||||
|
||||
LOG_TEE("\n");
|
||||
|
|
|
@ -6,7 +6,6 @@ import re
|
|||
import torch
|
||||
import numpy as np
|
||||
from gguf import *
|
||||
import timm
|
||||
from transformers.models.idefics2.modeling_idefics2 import Idefics2VisionTransformer, Idefics2VisionConfig
|
||||
|
||||
TEXT = "clip.text"
|
||||
|
@ -159,6 +158,7 @@ fname_middle = None
|
|||
has_text_encoder = True
|
||||
has_vision_encoder = True
|
||||
has_minicpmv_projector = False
|
||||
minicpmv_version = 2
|
||||
if args.text_only:
|
||||
fname_middle = "text-"
|
||||
has_vision_encoder = False
|
||||
|
|
|
@ -133,7 +133,6 @@ class SiglipVisionConfig(PretrainedConfig):
|
|||
)
|
||||
|
||||
return cls.from_dict(config_dict, **kwargs)
|
||||
|
||||
|
||||
_CHECKPOINT_FOR_DOC = "google/siglip-base-patch16-224"
|
||||
|
||||
|
@ -1096,6 +1095,7 @@ fname_middle = None
|
|||
has_text_encoder = True
|
||||
has_vision_encoder = True
|
||||
has_minicpmv_projector = False
|
||||
minicpmv_version = 3
|
||||
if args.text_only:
|
||||
fname_middle = "text-"
|
||||
has_vision_encoder = False
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue