minicpm working without uhd

This commit is contained in:
Xuan Son Nguyen 2025-01-23 12:14:06 +01:00
parent c0d93dd509
commit 8586d23c8a
9 changed files with 77 additions and 2 deletions

View file

@ -1382,6 +1382,10 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_V_RESMPL_Q_NORM, "v.resmpl.q_norm" },
{ LLM_TENSOR_V_RESMPL_PROJ, "v.resmpl.proj" },
{ LLM_TENSOR_V_RESMPL_QUERY, "v.resmpl.query" },
{ LLM_TENSOR_V_TOK_EMBD_IMAGE, "v.tok_embd.image" },
{ LLM_TENSOR_V_TOK_EMBD_END_IMAGE, "v.tok_embd.end_image" },
{ LLM_TENSOR_V_TOK_EMBD_SLICE, "v.tok_embd.slice" },
{ LLM_TENSOR_V_TOK_EMBD_END_SLICE, "v.tok_embd.end_slice" },
}
},
{

View file

@ -381,6 +381,10 @@ enum llm_tensor {
LLM_TENSOR_V_RESMPL_Q_NORM,
LLM_TENSOR_V_RESMPL_PROJ,
LLM_TENSOR_V_RESMPL_QUERY,
LLM_TENSOR_V_TOK_EMBD_IMAGE,
LLM_TENSOR_V_TOK_EMBD_END_IMAGE,
LLM_TENSOR_V_TOK_EMBD_SLICE,
LLM_TENSOR_V_TOK_EMBD_END_SLICE,
};
enum llm_tensor_layer {

View file

@ -3549,6 +3549,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
vit.mm_model_ln_post_w = ml.create_tensor(ctx_vision, tn(LLM_TENSOR_V_RESMPL_POST_NORM, "weight"), {rs_n_embd});
vit.mm_model_ln_post_b = ml.create_tensor(ctx_vision, tn(LLM_TENSOR_V_RESMPL_POST_NORM, "bias" ), {rs_n_embd});
// tok embd
vit.mm_tok_embd_image = ml.create_tensor(ctx_vision, tn(LLM_TENSOR_V_TOK_EMBD_IMAGE, "weight"), {n_embd});
vit.mm_tok_embd_end_image = ml.create_tensor(ctx_vision, tn(LLM_TENSOR_V_TOK_EMBD_END_IMAGE, "weight"), {n_embd});
vit.mm_tok_embd_slice = ml.create_tensor(ctx_vision, tn(LLM_TENSOR_V_TOK_EMBD_SLICE, "weight"), {n_embd});
vit.mm_tok_embd_end_slice = ml.create_tensor(ctx_vision, tn(LLM_TENSOR_V_TOK_EMBD_END_SLICE, "weight"), {n_embd});
for (int i = 0; i < n_vlayer; ++i) {
auto & layer = vit.layers[i];

View file

@ -895,6 +895,10 @@ struct llama_vision_graph_builder {
cur = ggml_mul_mat(ctx0, model.mm_model_proj, cur);
}
// add <image> and </image> token embeddings
cur = ggml_concat(ctx0, model.mm_tok_embd_image, cur, 1);
cur = ggml_concat(ctx0, cur, model.mm_tok_embd_end_image, 1);
ggml_set_name(cur, "output");
ggml_build_forward_expand(gf, cur);

View file

@ -129,7 +129,11 @@ struct llama_vision_model {
struct ggml_tensor * mm_model_ln_post_w = nullptr;
struct ggml_tensor * mm_model_ln_post_b = nullptr;
struct ggml_tensor * image_newline = nullptr;
// special tokens
struct ggml_tensor * mm_tok_embd_image = nullptr;
struct ggml_tensor * mm_tok_embd_end_image = nullptr;
struct ggml_tensor * mm_tok_embd_slice = nullptr;
struct ggml_tensor * mm_tok_embd_end_slice = nullptr;
};
struct llama_vision_context {