fix: do not complicate things
This commit is contained in:
parent
cc0ac09712
commit
21936ddb5d
3 changed files with 9 additions and 39 deletions
|
@ -186,8 +186,6 @@ class MODEL_TENSOR(IntEnum):
|
||||||
ATTN_Q_NORM = auto()
|
ATTN_Q_NORM = auto()
|
||||||
ATTN_K_NORM = auto()
|
ATTN_K_NORM = auto()
|
||||||
LAYER_OUT_NORM = auto()
|
LAYER_OUT_NORM = auto()
|
||||||
LAYER_NORM_1 = auto()
|
|
||||||
LAYER_NORM_2 = auto()
|
|
||||||
SSM_IN = auto()
|
SSM_IN = auto()
|
||||||
SSM_CONV1D = auto()
|
SSM_CONV1D = auto()
|
||||||
SSM_X = auto()
|
SSM_X = auto()
|
||||||
|
@ -276,8 +274,6 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
||||||
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
||||||
MODEL_TENSOR.LAYER_NORM_1: "blk.{bid}.layer_norm_1",
|
|
||||||
MODEL_TENSOR.LAYER_NORM_2: "blk.{bid}.layer_norm_2",
|
|
||||||
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
||||||
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
||||||
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
||||||
|
@ -430,8 +426,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.FFN_GATE,
|
MODEL_TENSOR.FFN_GATE,
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM,
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
||||||
MODEL_TENSOR.LAYER_NORM_1,
|
MODEL_TENSOR.ATTN_NORM_2,
|
||||||
MODEL_TENSOR.LAYER_NORM_2,
|
|
||||||
],
|
],
|
||||||
MODEL_ARCH.MPT: [
|
MODEL_ARCH.MPT: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
|
|
@ -102,6 +102,7 @@ class TensorNameMap:
|
||||||
# Attention norm 2
|
# Attention norm 2
|
||||||
MODEL_TENSOR.ATTN_NORM_2: (
|
MODEL_TENSOR.ATTN_NORM_2: (
|
||||||
"transformer.h.{bid}.ln_attn", # falcon40b
|
"transformer.h.{bid}.ln_attn", # falcon40b
|
||||||
|
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention query-key-value
|
# Attention query-key-value
|
||||||
|
@ -351,20 +352,9 @@ class TensorNameMap:
|
||||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||||
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||||
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
||||||
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
|
||||||
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
|
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
|
||||||
),
|
),
|
||||||
|
|
||||||
|
|
||||||
MODEL_TENSOR.LAYER_NORM_1: (
|
|
||||||
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
|
||||||
),
|
|
||||||
|
|
||||||
|
|
||||||
MODEL_TENSOR.LAYER_NORM_2: (
|
|
||||||
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
|
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.SSM_IN: (
|
MODEL_TENSOR.SSM_IN: (
|
||||||
"model.layers.{bid}.in_proj",
|
"model.layers.{bid}.in_proj",
|
||||||
"backbone.layers.{bid}.mixer.in_proj",
|
"backbone.layers.{bid}.mixer.in_proj",
|
||||||
|
|
29
llama.cpp
29
llama.cpp
|
@ -496,8 +496,6 @@ enum llm_tensor {
|
||||||
LLM_TENSOR_ATTN_KV_B,
|
LLM_TENSOR_ATTN_KV_B,
|
||||||
LLM_TENSOR_ATTN_Q_A_NORM,
|
LLM_TENSOR_ATTN_Q_A_NORM,
|
||||||
LLM_TENSOR_ATTN_KV_A_NORM,
|
LLM_TENSOR_ATTN_KV_A_NORM,
|
||||||
LLM_TENSOR_LAYER_NORM_1,
|
|
||||||
LLM_TENSOR_LAYER_NORM_2,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES = {
|
static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES = {
|
||||||
|
@ -719,8 +717,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
|
||||||
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
||||||
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
||||||
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
||||||
{ LLM_TENSOR_LAYER_NORM_1, "blk.%d.layer_norm_1" },
|
{ LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
|
||||||
{ LLM_TENSOR_LAYER_NORM_2, "blk.%d.layer_norm_2" },
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -2014,12 +2011,6 @@ struct llama_layer {
|
||||||
struct ggml_tensor * layer_out_norm_b;
|
struct ggml_tensor * layer_out_norm_b;
|
||||||
struct ggml_tensor * ffn_norm_exps;
|
struct ggml_tensor * ffn_norm_exps;
|
||||||
|
|
||||||
// extra normalization layers needed by `jina-embeddings-v2-base-code`
|
|
||||||
struct ggml_tensor * layer_norm_1;
|
|
||||||
struct ggml_tensor * layer_norm_1_b;
|
|
||||||
struct ggml_tensor * layer_norm_2;
|
|
||||||
struct ggml_tensor * layer_norm_2_b;
|
|
||||||
|
|
||||||
// ff
|
// ff
|
||||||
struct ggml_tensor * ffn_gate; // w1
|
struct ggml_tensor * ffn_gate; // w1
|
||||||
struct ggml_tensor * ffn_down; // w2
|
struct ggml_tensor * ffn_down; // w2
|
||||||
|
@ -4680,7 +4671,8 @@ static void llm_load_vocab(
|
||||||
tokenizer_pre == "jina-es" ||
|
tokenizer_pre == "jina-es" ||
|
||||||
tokenizer_pre == "jina-de" ||
|
tokenizer_pre == "jina-de" ||
|
||||||
tokenizer_pre == "jina-v2-es" ||
|
tokenizer_pre == "jina-v2-es" ||
|
||||||
tokenizer_pre == "jina-v2-de") {
|
tokenizer_pre == "jina-v2-de" ||
|
||||||
|
tokenizer_pre == "jina-v2-code") {
|
||||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
||||||
} else if (
|
} else if (
|
||||||
tokenizer_pre == "refact") {
|
tokenizer_pre == "refact") {
|
||||||
|
@ -5547,12 +5539,9 @@ static bool llm_load_tensors(
|
||||||
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
|
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
|
||||||
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
|
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
|
||||||
|
|
||||||
layer.layer_norm_1 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_1, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
layer.attn_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||||
layer.layer_norm_1_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_1, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
layer.attn_norm_2_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||||
|
|
||||||
layer.layer_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_2, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
|
||||||
layer.layer_norm_2_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_2, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
|
||||||
|
|
||||||
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
||||||
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
|
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
|
||||||
|
|
||||||
|
@ -8516,12 +8505,8 @@ struct llm_build_context {
|
||||||
// attention layer norm
|
// attention layer norm
|
||||||
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_out_norm, model.layers[il].attn_out_norm_b, LLM_NORM, cb, il);
|
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_out_norm, model.layers[il].attn_out_norm_b, LLM_NORM, cb, il);
|
||||||
|
|
||||||
if (model.layers[il].layer_norm_1 != nullptr) {
|
if (model.layers[il].attn_norm_2 != nullptr) {
|
||||||
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_norm_1, model.layers[il].layer_norm_1_b, LLM_NORM, cb, il);
|
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_norm_2, model.layers[il].attn_norm_2_b, LLM_NORM, cb, il);
|
||||||
}
|
|
||||||
|
|
||||||
if (model.layers[il].layer_norm_2 != nullptr) {
|
|
||||||
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_norm_2, model.layers[il].layer_norm_2_b, LLM_NORM, cb, il);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * ffn_inp = cur;
|
struct ggml_tensor * ffn_inp = cur;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue