feat: rename Jina Bert to Jina Bert V2

This commit is contained in:
Joan Martinez 2024-04-24 15:46:18 +02:00
parent dfa067631c
commit c3f4b1f2d2
4 changed files with 22 additions and 22 deletions

View file

@ -2718,8 +2718,8 @@ class OlmoModel(Model):
@Model.register("JinaBertModel", "JinaBertForMaskedLM") @Model.register("JinaBertModel", "JinaBertForMaskedLM")
class JinaBertModel(BertModel): class JinaBertV2Model(BertModel):
model_arch = gguf.MODEL_ARCH.JINA_BERT model_arch = gguf.MODEL_ARCH.JINA_BERT_V2
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)

View file

@ -118,7 +118,7 @@ class MODEL_ARCH(IntEnum):
REFACT = auto() REFACT = auto()
BERT = auto() BERT = auto()
NOMIC_BERT = auto() NOMIC_BERT = auto()
JINA_BERT = auto() JINA_BERT_V2 = auto()
BLOOM = auto() BLOOM = auto()
STABLELM = auto() STABLELM = auto()
QWEN = auto() QWEN = auto()
@ -195,7 +195,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.REFACT: "refact", MODEL_ARCH.REFACT: "refact",
MODEL_ARCH.BERT: "bert", MODEL_ARCH.BERT: "bert",
MODEL_ARCH.NOMIC_BERT: "nomic-bert", MODEL_ARCH.NOMIC_BERT: "nomic-bert",
MODEL_ARCH.JINA_BERT: "jina-bert", MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
MODEL_ARCH.BLOOM: "bloom", MODEL_ARCH.BLOOM: "bloom",
MODEL_ARCH.STABLELM: "stablelm", MODEL_ARCH.STABLELM: "stablelm",
MODEL_ARCH.QWEN: "qwen", MODEL_ARCH.QWEN: "qwen",
@ -380,7 +380,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_UP, MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.LAYER_OUT_NORM, MODEL_TENSOR.LAYER_OUT_NORM,
], ],
MODEL_ARCH.JINA_BERT: [ MODEL_ARCH.JINA_BERT_V2: [
MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.TOKEN_EMBD_NORM, MODEL_TENSOR.TOKEN_EMBD_NORM,
MODEL_TENSOR.TOKEN_TYPES, MODEL_TENSOR.TOKEN_TYPES,

View file

@ -238,7 +238,7 @@ class TensorNameMap:
"model.layers.{bid}.feed_forward.w3", # internlm2 "model.layers.{bid}.feed_forward.w3", # internlm2
"encoder.layers.{bid}.mlp.fc11", # nomic-bert "encoder.layers.{bid}.mlp.fc11", # nomic-bert
"model.layers.{bid}.mlp.c_fc", # starcoder2 "model.layers.{bid}.mlp.c_fc", # starcoder2
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
), ),
MODEL_TENSOR.FFN_UP_EXP: ( MODEL_TENSOR.FFN_UP_EXP: (
@ -265,7 +265,7 @@ class TensorNameMap:
"model.layers.layers.{bid}.mlp.gate_proj", # plamo "model.layers.layers.{bid}.mlp.gate_proj", # plamo
"model.layers.{bid}.feed_forward.w1", # internlm2 "model.layers.{bid}.feed_forward.w1", # internlm2
"encoder.layers.{bid}.mlp.fc12", # nomic-bert "encoder.layers.{bid}.mlp.fc12", # nomic-bert
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
), ),
MODEL_TENSOR.FFN_GATE_EXP: ( MODEL_TENSOR.FFN_GATE_EXP: (
@ -299,7 +299,7 @@ class TensorNameMap:
"model.layers.{bid}.feed_forward.w2", # internlm2 "model.layers.{bid}.feed_forward.w2", # internlm2
"encoder.layers.{bid}.mlp.fc2", # nomic-bert "encoder.layers.{bid}.mlp.fc2", # nomic-bert
"model.layers.{bid}.mlp.c_proj", # starcoder2 "model.layers.{bid}.mlp.c_proj", # starcoder2
"encoder.layer.{bid}.mlp.wo", # jina-bert "encoder.layer.{bid}.mlp.wo", # jina-bert-v2
), ),
MODEL_TENSOR.FFN_DOWN_EXP: ( MODEL_TENSOR.FFN_DOWN_EXP: (
@ -318,7 +318,7 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.q_layernorm", # persimmon "model.layers.{bid}.self_attn.q_layernorm", # persimmon
"model.layers.{bid}.self_attn.q_norm", # cohere "model.layers.{bid}.self_attn.q_norm", # cohere
"transformer.blocks.{bid}.attn.q_ln", # sea-lion "transformer.blocks.{bid}.attn.q_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert "encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
), ),
MODEL_TENSOR.ATTN_K_NORM: ( MODEL_TENSOR.ATTN_K_NORM: (
@ -326,7 +326,7 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.k_layernorm", # persimmon "model.layers.{bid}.self_attn.k_layernorm", # persimmon
"model.layers.{bid}.self_attn.k_norm", # cohere "model.layers.{bid}.self_attn.k_norm", # cohere
"transformer.blocks.{bid}.attn.k_ln", # sea-lion "transformer.blocks.{bid}.attn.k_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert "encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
), ),
MODEL_TENSOR.ROPE_FREQS: ( MODEL_TENSOR.ROPE_FREQS: (
@ -337,7 +337,7 @@ class TensorNameMap:
"encoder.layer.{bid}.output.LayerNorm", # bert "encoder.layer.{bid}.output.LayerNorm", # bert
"encoder.layers.{bid}.norm2", # nomic-bert "encoder.layers.{bid}.norm2", # nomic-bert
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
"encoder.layer.{bid}.mlp.layernorm", # jina-bert "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
), ),
MODEL_TENSOR.SSM_IN: ( MODEL_TENSOR.SSM_IN: (

View file

@ -205,7 +205,7 @@ enum llm_arch {
LLM_ARCH_REFACT, LLM_ARCH_REFACT,
LLM_ARCH_BERT, LLM_ARCH_BERT,
LLM_ARCH_NOMIC_BERT, LLM_ARCH_NOMIC_BERT,
LLM_ARCH_JINA_BERT, LLM_ARCH_JINA_BERT_V2,
LLM_ARCH_BLOOM, LLM_ARCH_BLOOM,
LLM_ARCH_STABLELM, LLM_ARCH_STABLELM,
LLM_ARCH_QWEN, LLM_ARCH_QWEN,
@ -241,7 +241,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_REFACT, "refact" }, { LLM_ARCH_REFACT, "refact" },
{ LLM_ARCH_BERT, "bert" }, { LLM_ARCH_BERT, "bert" },
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" }, { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
{ LLM_ARCH_JINA_BERT, "jina-bert" }, { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2"},
{ LLM_ARCH_BLOOM, "bloom" }, { LLM_ARCH_BLOOM, "bloom" },
{ LLM_ARCH_STABLELM, "stablelm" }, { LLM_ARCH_STABLELM, "stablelm" },
{ LLM_ARCH_QWEN, "qwen" }, { LLM_ARCH_QWEN, "qwen" },
@ -690,7 +690,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
}, },
}, },
{ {
LLM_ARCH_JINA_BERT, LLM_ARCH_JINA_BERT_V2,
{ {
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" }, { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" }, { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
@ -3893,7 +3893,7 @@ static void llm_load_hparams(
model.type = e_model::MODEL_335M; break; // bge-large model.type = e_model::MODEL_335M; break; // bge-large
} }
} break; } break;
case LLM_ARCH_JINA_BERT: case LLM_ARCH_JINA_BERT_V2:
{ {
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn); ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
@ -4137,7 +4137,7 @@ static void llm_load_hparams(
model.ftype = ml.ftype; model.ftype = ml.ftype;
if (hparams.f_max_alibi_bias > 0.0f && model.arch != LLM_ARCH_JINA_BERT) { if (hparams.f_max_alibi_bias > 0.0f && model.arch != LLM_ARCH_JINA_BERT_V2) {
hparams.need_kq_pos = true; hparams.need_kq_pos = true;
} }
@ -5113,7 +5113,7 @@ static bool llm_load_tensors(
layer.layer_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd}); layer.layer_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd});
} }
} break; } break;
case LLM_ARCH_JINA_BERT: case LLM_ARCH_JINA_BERT_V2:
{ {
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // word_embeddings model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // word_embeddings
model.type_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}); //token_type_embeddings model.type_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}); //token_type_embeddings
@ -7994,7 +7994,7 @@ struct llm_build_context {
struct ggml_tensor * inpL; struct ggml_tensor * inpL;
struct ggml_tensor * inp_pos = nullptr; struct ggml_tensor * inp_pos = nullptr;
if (model.arch != LLM_ARCH_JINA_BERT) { if (model.arch != LLM_ARCH_JINA_BERT_V2) {
inp_pos = build_inp_pos(); inp_pos = build_inp_pos();
} }
struct ggml_tensor * inp_mean = build_inp_mean(); struct ggml_tensor * inp_mean = build_inp_mean();
@ -8027,7 +8027,7 @@ struct llm_build_context {
struct ggml_tensor * Vcur; struct ggml_tensor * Vcur;
// self-attention // self-attention
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT) { if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT_V2) {
Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), model.layers[il].bq); Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), model.layers[il].bq);
cb(Qcur, "Qcur", il); cb(Qcur, "Qcur", il);
@ -8137,7 +8137,7 @@ struct llm_build_context {
model.layers[il].ffn_down, model.layers[il].ffn_down_b, model.layers[il].ffn_down, model.layers[il].ffn_down_b,
NULL, NULL,
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il); LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
} else if (model.arch == LLM_ARCH_JINA_BERT) { } else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
cur = llm_build_ffn(ctx0, cur, cur = llm_build_ffn(ctx0, cur,
model.layers[il].ffn_up, NULL, model.layers[il].ffn_up, NULL,
model.layers[il].ffn_gate, NULL, model.layers[il].ffn_gate, NULL,
@ -10544,7 +10544,7 @@ static struct ggml_cgraph * llama_build_graph(
result = llm.build_refact(); result = llm.build_refact();
} break; } break;
case LLM_ARCH_BERT: case LLM_ARCH_BERT:
case LLM_ARCH_JINA_BERT: case LLM_ARCH_JINA_BERT_V2:
case LLM_ARCH_NOMIC_BERT: case LLM_ARCH_NOMIC_BERT:
{ {
result = llm.build_bert(); result = llm.build_bert();
@ -15473,7 +15473,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
case LLM_ARCH_REFACT: case LLM_ARCH_REFACT:
case LLM_ARCH_BLOOM: case LLM_ARCH_BLOOM:
case LLM_ARCH_MAMBA: case LLM_ARCH_MAMBA:
case LLM_ARCH_JINA_BERT: case LLM_ARCH_JINA_BERT_V2:
return LLAMA_ROPE_TYPE_NONE; return LLAMA_ROPE_TYPE_NONE;
// use what we call a normal RoPE, operating on pairs of consecutive head values // use what we call a normal RoPE, operating on pairs of consecutive head values