wip minicpmv
This commit is contained in:
parent
d0068ef0ed
commit
4a7ab89d75
9 changed files with 491 additions and 77 deletions
|
@ -310,6 +310,7 @@ class MODEL_ARCH(IntEnum):
|
|||
# vision models
|
||||
VISION_LLAVA = auto()
|
||||
VISION_MOBILEVLM = auto()
|
||||
VISION_MINICPMV = auto()
|
||||
|
||||
|
||||
class MODEL_TENSOR(IntEnum):
|
||||
|
@ -455,6 +456,15 @@ class MODEL_TENSOR(IntEnum):
|
|||
V_ENC_FFN_DOWN = auto()
|
||||
V_PRE_NORM = auto()
|
||||
V_POST_NORM = auto()
|
||||
V_RESMPL_POS_EMBD_K = auto() # minicpmv
|
||||
V_RESMPL_ATTN_IN = auto() # minicpmv
|
||||
V_RESMPL_ATTN_OUT = auto() # minicpmv
|
||||
V_RESMPL_KV_PROJ = auto() # minicpmv
|
||||
V_RESMPL_NORM_POST = auto() # minicpmv
|
||||
V_RESMPL_NORM_KV = auto() # minicpmv
|
||||
V_RESMPL_NORM_Q = auto() # minicpmv
|
||||
V_RESMPL_PROJ = auto() # minicpmv
|
||||
V_RESMPL_QUERY = auto() # minicpmv
|
||||
|
||||
|
||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
|
@ -518,6 +528,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|||
# vision
|
||||
MODEL_ARCH.VISION_LLAVA: "llava",
|
||||
MODEL_ARCH.VISION_MOBILEVLM: "mobilevlm",
|
||||
MODEL_ARCH.VISION_MINICPMV: "minicpmv",
|
||||
}
|
||||
|
||||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||
|
@ -662,6 +673,15 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|||
MODEL_TENSOR.V_ENC_FFN_DOWN: "v.enc.blk.{bid}.ffn_down",
|
||||
MODEL_TENSOR.V_PRE_NORM: "v.pre_norm",
|
||||
MODEL_TENSOR.V_POST_NORM: "v.post_norm",
|
||||
MODEL_TENSOR.V_RESMPL_POS_EMBD_K: "v.resmpl.pos_embd_k",
|
||||
MODEL_TENSOR.V_RESMPL_ATTN_IN: "v.resmpl.attn_in",
|
||||
MODEL_TENSOR.V_RESMPL_ATTN_OUT: "v.resmpl.attn_out",
|
||||
MODEL_TENSOR.V_RESMPL_KV_PROJ: "v.resmpl.kv_proj",
|
||||
MODEL_TENSOR.V_RESMPL_NORM_POST: "v.resmpl.norm_post",
|
||||
MODEL_TENSOR.V_RESMPL_NORM_KV: "v.resmpl.norm_kv",
|
||||
MODEL_TENSOR.V_RESMPL_NORM_Q: "v.resmpl.norm_q",
|
||||
MODEL_TENSOR.V_RESMPL_PROJ: "v.resmpl.proj",
|
||||
MODEL_TENSOR.V_RESMPL_QUERY: "v.resmpl.query",
|
||||
}
|
||||
|
||||
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
|
@ -1636,6 +1656,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||
MODEL_TENSOR.V_PRE_NORM,
|
||||
MODEL_TENSOR.V_POST_NORM,
|
||||
],
|
||||
MODEL_ARCH.VISION_MINICPMV: [
|
||||
MODEL_TENSOR.V_ENC_EMBD_PATCH,
|
||||
MODEL_TENSOR.V_ENC_EMBD_POS,
|
||||
MODEL_TENSOR.V_ENC_ATTN_Q,
|
||||
MODEL_TENSOR.V_ENC_ATTN_K,
|
||||
MODEL_TENSOR.V_ENC_ATTN_V,
|
||||
MODEL_TENSOR.V_ENC_INPUT_NORM,
|
||||
MODEL_TENSOR.V_ENC_OUTPUT,
|
||||
MODEL_TENSOR.V_ENC_OUTPUT_NORM,
|
||||
MODEL_TENSOR.V_ENC_FFN_UP,
|
||||
MODEL_TENSOR.V_ENC_FFN_DOWN,
|
||||
MODEL_TENSOR.V_RESMPL_ATTN_IN,
|
||||
MODEL_TENSOR.V_RESMPL_ATTN_OUT,
|
||||
MODEL_TENSOR.V_RESMPL_KV_PROJ,
|
||||
MODEL_TENSOR.V_RESMPL_NORM_POST,
|
||||
MODEL_TENSOR.V_RESMPL_NORM_KV,
|
||||
MODEL_TENSOR.V_RESMPL_NORM_Q,
|
||||
MODEL_TENSOR.V_RESMPL_PROJ,
|
||||
MODEL_TENSOR.V_RESMPL_QUERY,
|
||||
],
|
||||
# TODO
|
||||
}
|
||||
|
||||
|
@ -1718,8 +1758,10 @@ class PoolingType(IntEnum):
|
|||
|
||||
|
||||
class CLIPProjectorType(Enum):
|
||||
MLP = 'mlp'
|
||||
LDPV2 = 'ldpv2'
|
||||
MLP = 'mlp'
|
||||
LDPV2 = 'ldpv2'
|
||||
MINICPMV_2_5 = 'minicpmv-2.5' # resampler
|
||||
MINICPMV_2_6 = 'minicpmv-2.6' # resampler
|
||||
|
||||
|
||||
class CLIPPatchMergeType(Enum):
|
||||
|
|
|
@ -808,42 +808,52 @@ class TensorNameMap:
|
|||
|
||||
MODEL_TENSOR.V_ENC_EMBD_PATCH: (
|
||||
"vision_tower.vision_model.embeddings.patch_embedding",
|
||||
"vpm.embeddings.patch_embedding",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_ENC_EMBD_POS: (
|
||||
"vision_tower.vision_model.embeddings.position_embedding",
|
||||
"vpm.embeddings.position_embedding",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_ENC_ATTN_Q: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj",
|
||||
"vpm.encoder.layers.{bid}.self_attn.q_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_ENC_ATTN_K: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj",
|
||||
"vpm.encoder.layers.{bid}.self_attn.k_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_ENC_ATTN_V: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj",
|
||||
"vpm.encoder.layers.{bid}.self_attn.v_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_ENC_INPUT_NORM: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm1",
|
||||
"vpm.encoder.layers.{bid}.layer_norm1",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_ENC_OUTPUT: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj",
|
||||
"vpm.encoder.layers.{bid}.self_attn.out_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_ENC_OUTPUT_NORM: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm2",
|
||||
"vpm.encoder.layers.{bid}.layer_norm2",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_ENC_FFN_UP: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1",
|
||||
"vpm.encoder.layers.{bid}.mlp.fc1",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_ENC_FFN_DOWN: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2",
|
||||
"vpm.encoder.layers.{bid}.mlp.fc2",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_PRE_NORM: (
|
||||
|
@ -853,6 +863,42 @@ class TensorNameMap:
|
|||
MODEL_TENSOR.V_POST_NORM: (
|
||||
"vision_tower.vision_model.post_layernorm",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_RESMPL_POS_EMBD_K: (
|
||||
"resampler.pos_embed_k",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_RESMPL_ATTN_IN: (
|
||||
"resampler.attn.in_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_RESMPL_ATTN_OUT: (
|
||||
"resampler.attn.out_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_RESMPL_KV_PROJ: (
|
||||
"resampler.kv_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_RESMPL_NORM_POST: (
|
||||
"resampler.ln_post",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_RESMPL_NORM_KV: (
|
||||
"resampler.ln_kv",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_RESMPL_NORM_Q: (
|
||||
"resampler.ln_q",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_RESMPL_PROJ: (
|
||||
"resampler.proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_RESMPL_QUERY: (
|
||||
"resampler.query",
|
||||
),
|
||||
}
|
||||
|
||||
# architecture-specific block mappings
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue