From cc0ac09712c2ba745431f57bb7da02170c02a335 Mon Sep 17 00:00:00 2001 From: Joan Martinez Date: Tue, 28 May 2024 20:45:04 +0200 Subject: [PATCH] feat: add changes to handle jina v2 base code --- convert-hf-to-gguf-update.py | 1 + convert-hf-to-gguf.py | 13 ++++++------- gguf-py/gguf/constants.py | 6 ++++++ gguf-py/gguf/tensor_mapping.py | 13 +++++++++++++ llama.cpp | 24 ++++++++++++++++++++++++ 5 files changed, 50 insertions(+), 7 deletions(-) diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py index 84b72348d..ee1eeaca0 100755 --- a/convert-hf-to-gguf-update.py +++ b/convert-hf-to-gguf-update.py @@ -82,6 +82,7 @@ models = [ {"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", }, {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", }, {"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", }, + {"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", }, ] diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 1b060e4e6..2ece07f81 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -422,9 +422,6 @@ class Model: # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! - if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": - # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B - res = "llama-bpe" if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754": # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base res = "deepseek-llm" @@ -461,9 +458,6 @@ class Model: if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166": # ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf res = "olmo" - if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e": - # ref: https://huggingface.co/databricks/dbrx-base - res = "dbrx" if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en res = "jina-v2-en" @@ -476,6 +470,9 @@ class Model: if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d": # ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct res = "smaug-bpe" + if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a": + # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code + res = "jina-v2-code" if res is None: logger.warning("\n") @@ -2442,11 +2439,13 @@ class JinaBertV2Model(BertModel): def get_tensors(self): for name, data in super().get_tensors(): - if 'gated_layers' in name: + if 'gated_layer' in name: d1 = data[:self.intermediate_size, :] name1 = name.replace('gated_layers', 'gated_layers_w') + name1 = name.replace('up_gated_layer', 'gated_layers_w') d2 = data[self.intermediate_size:, :] name2 = name.replace('gated_layers', 'gated_layers_v') + name2 = name.replace('up_gated_layer', 'gated_layers_v') yield name1, d1 yield name2, d2 continue diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 55ec2cb5c..47a108779 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -186,6 +186,8 @@ class MODEL_TENSOR(IntEnum): ATTN_Q_NORM = auto() ATTN_K_NORM = auto() LAYER_OUT_NORM = auto() + LAYER_NORM_1 = auto() + LAYER_NORM_2 = auto() SSM_IN = auto() SSM_CONV1D = auto() SSM_X = auto() @@ -274,6 +276,8 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = { MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps", MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps", MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm", + MODEL_TENSOR.LAYER_NORM_1: "blk.{bid}.layer_norm_1", + MODEL_TENSOR.LAYER_NORM_2: "blk.{bid}.layer_norm_2", MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in", MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d", MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x", @@ -426,6 +430,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_GATE, MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.LAYER_OUT_NORM, + MODEL_TENSOR.LAYER_NORM_1, + MODEL_TENSOR.LAYER_NORM_2, ], MODEL_ARCH.MPT: [ MODEL_TENSOR.TOKEN_EMBD, diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 83e3c4c33..ea139339c 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -311,6 +311,7 @@ class TensorNameMap: "model.layers.{bid}.mlp.c_proj", # starcoder2 "encoder.layer.{bid}.mlp.wo", # jina-bert-v2 "model.layers.{bid}.residual_mlp.w2", # arctic + "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2 ), MODEL_TENSOR.FFN_DOWN_EXP: ( @@ -350,6 +351,18 @@ class TensorNameMap: "encoder.layers.{bid}.norm2", # nomic-bert "transformer.decoder_layer.{bid}.rms_norm_3", # Grok "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2 + "encoder.layer.{bid}.layer_norm_1", # jina-v2-code + "encoder.layer.{bid}.layer_norm_2" # jina-v2-code + ), + + + MODEL_TENSOR.LAYER_NORM_1: ( + "encoder.layer.{bid}.layer_norm_1", # jina-v2-code + ), + + + MODEL_TENSOR.LAYER_NORM_2: ( + "encoder.layer.{bid}.layer_norm_2", # jina-v2-code ), MODEL_TENSOR.SSM_IN: ( diff --git a/llama.cpp b/llama.cpp index 10c9e47dd..229b63a29 100644 --- a/llama.cpp +++ b/llama.cpp @@ -496,6 +496,8 @@ enum llm_tensor { LLM_TENSOR_ATTN_KV_B, LLM_TENSOR_ATTN_Q_A_NORM, LLM_TENSOR_ATTN_KV_A_NORM, + LLM_TENSOR_LAYER_NORM_1, + LLM_TENSOR_LAYER_NORM_2, }; static const std::map> LLM_TENSOR_NAMES = { @@ -717,6 +719,8 @@ static const std::map> LLM_TENSOR_NA { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_LAYER_NORM_1, "blk.%d.layer_norm_1" }, + { LLM_TENSOR_LAYER_NORM_2, "blk.%d.layer_norm_2" }, }, }, { @@ -2010,6 +2014,12 @@ struct llama_layer { struct ggml_tensor * layer_out_norm_b; struct ggml_tensor * ffn_norm_exps; + // extra normalization layers needed by `jina-embeddings-v2-base-code` + struct ggml_tensor * layer_norm_1; + struct ggml_tensor * layer_norm_1_b; + struct ggml_tensor * layer_norm_2; + struct ggml_tensor * layer_norm_2_b; + // ff struct ggml_tensor * ffn_gate; // w1 struct ggml_tensor * ffn_down; // w2 @@ -5537,6 +5547,12 @@ static bool llm_load_tensors( layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd}); + layer.layer_norm_1 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_1, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED); + layer.layer_norm_1_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_1, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED); + + layer.layer_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_2, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED); + layer.layer_norm_2_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_2, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED); + layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}); layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}); @@ -8500,6 +8516,14 @@ struct llm_build_context { // attention layer norm cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_out_norm, model.layers[il].attn_out_norm_b, LLM_NORM, cb, il); + if (model.layers[il].layer_norm_1 != nullptr) { + cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_norm_1, model.layers[il].layer_norm_1_b, LLM_NORM, cb, il); + } + + if (model.layers[il].layer_norm_2 != nullptr) { + cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_norm_2, model.layers[il].layer_norm_2_b, LLM_NORM, cb, il); + } + struct ggml_tensor * ffn_inp = cur; cb(ffn_inp, "ffn_inp", il);