use id for mmproj tensors

This commit is contained in:
Xuan Son Nguyen 2024-10-03 10:51:20 +02:00
parent c2ec885264
commit 49e7304cdf
3 changed files with 13 additions and 18 deletions

View file

@ -375,8 +375,7 @@ class MODEL_TENSOR(IntEnum):
ENC_FFN_UP = auto()
ENC_OUTPUT_NORM = auto()
# vision
V_MMPROJ_A = auto()
V_MMPROJ_B = auto()
V_MMPROJ = auto()
V_ENC_EMBD_CLS = auto()
V_ENC_EMBD_PATCH = auto()
V_ENC_EMBD_POS = auto()
@ -552,8 +551,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
# vision
MODEL_TENSOR.V_MMPROJ_A: "v.mmproj_a",
MODEL_TENSOR.V_MMPROJ_B: "v.mmproj_b",
MODEL_TENSOR.V_MMPROJ: "v.mmproj_{bid}",
MODEL_TENSOR.V_ENC_EMBD_CLS: "v.enc.embd.cls",
MODEL_TENSOR.V_ENC_EMBD_PATCH: "v.enc.embd.patch",
MODEL_TENSOR.V_ENC_EMBD_POS: "v.enc.embd.pos",
@ -1343,8 +1341,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.LLAVA_VISION: [
MODEL_TENSOR.V_MMPROJ_A,
MODEL_TENSOR.V_MMPROJ_B,
MODEL_TENSOR.V_MMPROJ,
MODEL_TENSOR.V_ENC_EMBD_CLS,
MODEL_TENSOR.V_ENC_EMBD_PATCH,
MODEL_TENSOR.V_ENC_EMBD_POS,

View file

@ -680,12 +680,12 @@ class TensorNameMap:
"encoder.final_layer_norm", # t5
),
MODEL_TENSOR.V_MMPROJ_A: (
"multi_modal_projector.linear_1",
MODEL_TENSOR.V_MMPROJ: (
"multi_modal_projector.linear_{bid}",
),
MODEL_TENSOR.V_MMPROJ_B: (
"multi_modal_projector.linear_2",
MODEL_TENSOR.V_MMPROJ: (
"multi_modal_projector.linear_{bid}",
),
MODEL_TENSOR.V_ENC_EMBD_CLS: (

View file

@ -658,8 +658,7 @@ enum llm_tensor {
};
enum vision_tensor {
VISION_TENSOR_MMPROJ_A,
VISION_TENSOR_MMPROJ_B,
VISION_TENSOR_MMPROJ,
VISION_TENSOR_ENC_EMBD_CLS,
VISION_TENSOR_ENC_EMBD_PATCH,
VISION_TENSOR_ENC_EMBD_POS,
@ -1601,8 +1600,7 @@ static const std::map<vision_arch, std::map<vision_tensor, std::string>> VISION_
{
VISION_ARCH_LLAVA,
{
{ VISION_TENSOR_MMPROJ_A, "v.mmproj_a" },
{ VISION_TENSOR_MMPROJ_B, "v.mmproj_b" },
{ VISION_TENSOR_MMPROJ, "v.mmproj" },
{ VISION_TENSOR_ENC_EMBD_CLS, "v.enc.embd.cls" },
{ VISION_TENSOR_ENC_EMBD_PATCH, "v.enc.embd.patch" },
{ VISION_TENSOR_ENC_EMBD_POS, "v.enc.embd.pos" },
@ -8992,10 +8990,10 @@ static bool llm_load_tensors(
switch (vparams.arch) {
case VISION_ARCH_LLAVA:
{
model.clip.mm_a_w = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_MMPROJ_A, "weight"), {n_embd, n_ff});
model.clip.mm_a_b = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_MMPROJ_A, "bias" ), {n_ff});
model.clip.mm_b_w = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_MMPROJ_B, "weight"), {n_ff, n_ff});
model.clip.mm_b_b = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_MMPROJ_B, "bias" ), {n_ff});
model.clip.mm_a_w = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_MMPROJ, "weight", 1), {n_embd, n_ff});
model.clip.mm_a_b = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_MMPROJ, "bias" , 1), {n_ff});
model.clip.mm_b_w = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_MMPROJ, "weight", 2), {n_ff, n_ff});
model.clip.mm_b_b = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_MMPROJ, "bias" , 2), {n_ff});
model.clip.class_embedding = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_ENC_EMBD_CLS ), {n_embd});
model.clip.patch_embeddings = ml.create_tensor(ctx_vision, tn(VISION_TENSOR_ENC_EMBD_PATCH, "weight"), {patch_size, patch_size, n_channel, n_embd});