feat: rename Jina Bert to Jina Bert V2
This commit is contained in:
parent
dfa067631c
commit
c3f4b1f2d2
4 changed files with 22 additions and 22 deletions
|
@ -2718,8 +2718,8 @@ class OlmoModel(Model):
|
||||||
|
|
||||||
|
|
||||||
@Model.register("JinaBertModel", "JinaBertForMaskedLM")
|
@Model.register("JinaBertModel", "JinaBertForMaskedLM")
|
||||||
class JinaBertModel(BertModel):
|
class JinaBertV2Model(BertModel):
|
||||||
model_arch = gguf.MODEL_ARCH.JINA_BERT
|
model_arch = gguf.MODEL_ARCH.JINA_BERT_V2
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
|
@ -118,7 +118,7 @@ class MODEL_ARCH(IntEnum):
|
||||||
REFACT = auto()
|
REFACT = auto()
|
||||||
BERT = auto()
|
BERT = auto()
|
||||||
NOMIC_BERT = auto()
|
NOMIC_BERT = auto()
|
||||||
JINA_BERT = auto()
|
JINA_BERT_V2 = auto()
|
||||||
BLOOM = auto()
|
BLOOM = auto()
|
||||||
STABLELM = auto()
|
STABLELM = auto()
|
||||||
QWEN = auto()
|
QWEN = auto()
|
||||||
|
@ -195,7 +195,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.REFACT: "refact",
|
MODEL_ARCH.REFACT: "refact",
|
||||||
MODEL_ARCH.BERT: "bert",
|
MODEL_ARCH.BERT: "bert",
|
||||||
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
||||||
MODEL_ARCH.JINA_BERT: "jina-bert",
|
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
||||||
MODEL_ARCH.BLOOM: "bloom",
|
MODEL_ARCH.BLOOM: "bloom",
|
||||||
MODEL_ARCH.STABLELM: "stablelm",
|
MODEL_ARCH.STABLELM: "stablelm",
|
||||||
MODEL_ARCH.QWEN: "qwen",
|
MODEL_ARCH.QWEN: "qwen",
|
||||||
|
@ -380,7 +380,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM,
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.JINA_BERT: [
|
MODEL_ARCH.JINA_BERT_V2: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
||||||
MODEL_TENSOR.TOKEN_TYPES,
|
MODEL_TENSOR.TOKEN_TYPES,
|
||||||
|
|
|
@ -238,7 +238,7 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.feed_forward.w3", # internlm2
|
"model.layers.{bid}.feed_forward.w3", # internlm2
|
||||||
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
||||||
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
||||||
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert
|
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP: (
|
MODEL_TENSOR.FFN_UP_EXP: (
|
||||||
|
@ -265,7 +265,7 @@ class TensorNameMap:
|
||||||
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
||||||
"model.layers.{bid}.feed_forward.w1", # internlm2
|
"model.layers.{bid}.feed_forward.w1", # internlm2
|
||||||
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
||||||
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert
|
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP: (
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
||||||
|
@ -299,7 +299,7 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.feed_forward.w2", # internlm2
|
"model.layers.{bid}.feed_forward.w2", # internlm2
|
||||||
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
|
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
|
||||||
"model.layers.{bid}.mlp.c_proj", # starcoder2
|
"model.layers.{bid}.mlp.c_proj", # starcoder2
|
||||||
"encoder.layer.{bid}.mlp.wo", # jina-bert
|
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||||
|
@ -318,7 +318,7 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||||
"model.layers.{bid}.self_attn.q_norm", # cohere
|
"model.layers.{bid}.self_attn.q_norm", # cohere
|
||||||
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
||||||
"encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert
|
"encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_K_NORM: (
|
MODEL_TENSOR.ATTN_K_NORM: (
|
||||||
|
@ -326,7 +326,7 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||||
"model.layers.{bid}.self_attn.k_norm", # cohere
|
"model.layers.{bid}.self_attn.k_norm", # cohere
|
||||||
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
||||||
"encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert
|
"encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ROPE_FREQS: (
|
MODEL_TENSOR.ROPE_FREQS: (
|
||||||
|
@ -337,7 +337,7 @@ class TensorNameMap:
|
||||||
"encoder.layer.{bid}.output.LayerNorm", # bert
|
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||||
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||||
"encoder.layer.{bid}.mlp.layernorm", # jina-bert
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.SSM_IN: (
|
MODEL_TENSOR.SSM_IN: (
|
||||||
|
|
22
llama.cpp
22
llama.cpp
|
@ -205,7 +205,7 @@ enum llm_arch {
|
||||||
LLM_ARCH_REFACT,
|
LLM_ARCH_REFACT,
|
||||||
LLM_ARCH_BERT,
|
LLM_ARCH_BERT,
|
||||||
LLM_ARCH_NOMIC_BERT,
|
LLM_ARCH_NOMIC_BERT,
|
||||||
LLM_ARCH_JINA_BERT,
|
LLM_ARCH_JINA_BERT_V2,
|
||||||
LLM_ARCH_BLOOM,
|
LLM_ARCH_BLOOM,
|
||||||
LLM_ARCH_STABLELM,
|
LLM_ARCH_STABLELM,
|
||||||
LLM_ARCH_QWEN,
|
LLM_ARCH_QWEN,
|
||||||
|
@ -241,7 +241,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||||
{ LLM_ARCH_REFACT, "refact" },
|
{ LLM_ARCH_REFACT, "refact" },
|
||||||
{ LLM_ARCH_BERT, "bert" },
|
{ LLM_ARCH_BERT, "bert" },
|
||||||
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
|
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
|
||||||
{ LLM_ARCH_JINA_BERT, "jina-bert" },
|
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2"},
|
||||||
{ LLM_ARCH_BLOOM, "bloom" },
|
{ LLM_ARCH_BLOOM, "bloom" },
|
||||||
{ LLM_ARCH_STABLELM, "stablelm" },
|
{ LLM_ARCH_STABLELM, "stablelm" },
|
||||||
{ LLM_ARCH_QWEN, "qwen" },
|
{ LLM_ARCH_QWEN, "qwen" },
|
||||||
|
@ -690,7 +690,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
LLM_ARCH_JINA_BERT,
|
LLM_ARCH_JINA_BERT_V2,
|
||||||
{
|
{
|
||||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||||
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
||||||
|
@ -3893,7 +3893,7 @@ static void llm_load_hparams(
|
||||||
model.type = e_model::MODEL_335M; break; // bge-large
|
model.type = e_model::MODEL_335M; break; // bge-large
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_JINA_BERT:
|
case LLM_ARCH_JINA_BERT_V2:
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||||
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
||||||
|
@ -4137,7 +4137,7 @@ static void llm_load_hparams(
|
||||||
|
|
||||||
model.ftype = ml.ftype;
|
model.ftype = ml.ftype;
|
||||||
|
|
||||||
if (hparams.f_max_alibi_bias > 0.0f && model.arch != LLM_ARCH_JINA_BERT) {
|
if (hparams.f_max_alibi_bias > 0.0f && model.arch != LLM_ARCH_JINA_BERT_V2) {
|
||||||
hparams.need_kq_pos = true;
|
hparams.need_kq_pos = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5113,7 +5113,7 @@ static bool llm_load_tensors(
|
||||||
layer.layer_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd});
|
layer.layer_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd});
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_JINA_BERT:
|
case LLM_ARCH_JINA_BERT_V2:
|
||||||
{
|
{
|
||||||
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // word_embeddings
|
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // word_embeddings
|
||||||
model.type_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}); //token_type_embeddings
|
model.type_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}); //token_type_embeddings
|
||||||
|
@ -7994,7 +7994,7 @@ struct llm_build_context {
|
||||||
struct ggml_tensor * inpL;
|
struct ggml_tensor * inpL;
|
||||||
struct ggml_tensor * inp_pos = nullptr;
|
struct ggml_tensor * inp_pos = nullptr;
|
||||||
|
|
||||||
if (model.arch != LLM_ARCH_JINA_BERT) {
|
if (model.arch != LLM_ARCH_JINA_BERT_V2) {
|
||||||
inp_pos = build_inp_pos();
|
inp_pos = build_inp_pos();
|
||||||
}
|
}
|
||||||
struct ggml_tensor * inp_mean = build_inp_mean();
|
struct ggml_tensor * inp_mean = build_inp_mean();
|
||||||
|
@ -8027,7 +8027,7 @@ struct llm_build_context {
|
||||||
struct ggml_tensor * Vcur;
|
struct ggml_tensor * Vcur;
|
||||||
|
|
||||||
// self-attention
|
// self-attention
|
||||||
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT) {
|
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT_V2) {
|
||||||
Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), model.layers[il].bq);
|
Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), model.layers[il].bq);
|
||||||
cb(Qcur, "Qcur", il);
|
cb(Qcur, "Qcur", il);
|
||||||
|
|
||||||
|
@ -8137,7 +8137,7 @@ struct llm_build_context {
|
||||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b,
|
model.layers[il].ffn_down, model.layers[il].ffn_down_b,
|
||||||
NULL,
|
NULL,
|
||||||
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
|
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
|
||||||
} else if (model.arch == LLM_ARCH_JINA_BERT) {
|
} else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
|
||||||
cur = llm_build_ffn(ctx0, cur,
|
cur = llm_build_ffn(ctx0, cur,
|
||||||
model.layers[il].ffn_up, NULL,
|
model.layers[il].ffn_up, NULL,
|
||||||
model.layers[il].ffn_gate, NULL,
|
model.layers[il].ffn_gate, NULL,
|
||||||
|
@ -10544,7 +10544,7 @@ static struct ggml_cgraph * llama_build_graph(
|
||||||
result = llm.build_refact();
|
result = llm.build_refact();
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_BERT:
|
case LLM_ARCH_BERT:
|
||||||
case LLM_ARCH_JINA_BERT:
|
case LLM_ARCH_JINA_BERT_V2:
|
||||||
case LLM_ARCH_NOMIC_BERT:
|
case LLM_ARCH_NOMIC_BERT:
|
||||||
{
|
{
|
||||||
result = llm.build_bert();
|
result = llm.build_bert();
|
||||||
|
@ -15473,7 +15473,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
||||||
case LLM_ARCH_REFACT:
|
case LLM_ARCH_REFACT:
|
||||||
case LLM_ARCH_BLOOM:
|
case LLM_ARCH_BLOOM:
|
||||||
case LLM_ARCH_MAMBA:
|
case LLM_ARCH_MAMBA:
|
||||||
case LLM_ARCH_JINA_BERT:
|
case LLM_ARCH_JINA_BERT_V2:
|
||||||
return LLAMA_ROPE_TYPE_NONE;
|
return LLAMA_ROPE_TYPE_NONE;
|
||||||
|
|
||||||
// use what we call a normal RoPE, operating on pairs of consecutive head values
|
// use what we call a normal RoPE, operating on pairs of consecutive head values
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue