feat: add jina v2 support

This commit is contained in:
bwanglzu 2024-03-14 18:20:09 +01:00
parent 0fd6c1f015
commit 67022f14f2
4 changed files with 68 additions and 3 deletions

View file

@ -1783,6 +1783,16 @@ class NomicBertModel(BertModel):
yield name, data
@Model.register("JinaBertModel")
class JinaBertModel(BertModel):
model_arch = gguf.MODEL_ARCH.JINA_BERT
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
assert self.hparams["position_embedding_type"] == "alibi"
@Model.register("GemmaForCausalLM")
class GemmaModel(Model):
model_arch = gguf.MODEL_ARCH.GEMMA

View file

@ -107,6 +107,7 @@ class MODEL_ARCH(IntEnum):
REFACT = auto()
BERT = auto()
NOMIC_BERT = auto()
JINA_BERT = auto()
BLOOM = auto()
STABLELM = auto()
QWEN = auto()
@ -173,6 +174,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.REFACT: "refact",
MODEL_ARCH.BERT: "bert",
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
MODEL_ARCH.JINA_BERT: "jina-bert",
MODEL_ARCH.BLOOM: "bloom",
MODEL_ARCH.STABLELM: "stablelm",
MODEL_ARCH.QWEN: "qwen",
@ -326,6 +328,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.LAYER_OUT_NORM,
],
MODEL_ARCH.JINA_BERT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.TOKEN_EMBD_NORM,
MODEL_TENSOR.TOKEN_TYPES,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.ATTN_OUT_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.LAYER_OUT_NORM,
],
MODEL_ARCH.MPT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,

View file

@ -207,6 +207,9 @@ class TensorNameMap:
"model.layers.{bid}.mlp.up_proj", # llama-hf refact
"layers.{bid}.feed_forward.w3", # llama-pth
"encoder.layer.{bid}.intermediate.dense", # bert
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert
"encoder.layer.{bid}.mlp.layernorm", # jina-bert
"encoder.layer.{bid}.mlp.wo", # jina-bert
"transformer.h.{bid}.mlp.fc_in", # gpt-j
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
"model.layers.{bid}.mlp.dense_h_to_4h", # persimmon

View file

@ -201,6 +201,7 @@ enum llm_arch {
LLM_ARCH_REFACT,
LLM_ARCH_BERT,
LLM_ARCH_NOMIC_BERT,
LLM_ARCH_JINA_BERT,
LLM_ARCH_BLOOM,
LLM_ARCH_STABLELM,
LLM_ARCH_QWEN,
@ -230,6 +231,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_REFACT, "refact" },
{ LLM_ARCH_BERT, "bert" },
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
{ LLM_ARCH_JINA_BERT, "jina-bert" },
{ LLM_ARCH_BLOOM, "bloom" },
{ LLM_ARCH_STABLELM, "stablelm" },
{ LLM_ARCH_QWEN, "qwen" },
@ -607,6 +609,22 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
},
},
{
LLM_ARCH_JINA_BERT,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
{ LLM_TENSOR_TOKEN_TYPES, "token_types" },
{ LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
},
},
{
LLM_ARCH_BLOOM,
{
@ -3455,6 +3473,18 @@ static void llm_load_hparams(
model.type = e_model::MODEL_137M;
}
} break;
case LLM_ARCH_JINA_BERT:
{
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
switch (hparams.n_layer) {
case 4: model.type = e_model::MODEL_33M; break; // jina-embeddings-small
case 12: model.type = e_model::MODEL_137M; break; // jina-embeddings-base
}
} break;
case LLM_ARCH_BLOOM:
{
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
@ -4329,6 +4359,7 @@ static bool llm_load_tensors(
}
} break;
case LLM_ARCH_BERT:
case LLM_ARCH_JINA_BERT:
case LLM_ARCH_NOMIC_BERT:
{
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
@ -4346,7 +4377,7 @@ static bool llm_load_tensors(
auto & layer = model.layers[i];
if (model.arch == LLM_ARCH_BERT) {
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT) {
layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd});
layer.bq = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd});
@ -4367,7 +4398,7 @@ static bool llm_load_tensors(
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd});
if (model.arch == LLM_ARCH_BERT) {
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT) {
layer.bo = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd});
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff});
@ -6512,7 +6543,7 @@ struct llm_build_context {
struct ggml_tensor * Vcur;
// self-attention
if (model.arch == LLM_ARCH_BERT) {
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT) {
Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), model.layers[il].bq);
cb(Qcur, "Qcur", il);
@ -8411,6 +8442,10 @@ static struct ggml_cgraph * llama_build_graph(
{
result = llm.build_bert();
} break;
case LLM_ARCH_JINA_BERT:
{
result = llm.build_bert();
} break;
case LLM_ARCH_BLOOM:
{
result = llm.build_bloom();
@ -13094,6 +13129,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
case LLM_ARCH_REFACT:
case LLM_ARCH_BLOOM:
case LLM_ARCH_MAMBA:
case LLM_ARCH_JINA_BERT:
return LLAMA_ROPE_TYPE_NONE;
// use what we call a normal RoPE, operating on pairs of consecutive head values