llama : add support for Nomic Embed (#5468)

This commit is contained in:
Jared Van Bortel 2024-02-13 12:03:53 -05:00 committed by GitHub
parent c4e6dd59e4
commit ea9c8e1143
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 273 additions and 113 deletions

View file

@ -87,27 +87,28 @@ class Keys:
class MODEL_ARCH(IntEnum):
LLAMA = auto()
FALCON = auto()
BAICHUAN = auto()
GPT2 = auto()
GPTJ = auto()
GPTNEOX = auto()
MPT = auto()
STARCODER = auto()
PERSIMMON = auto()
REFACT = auto()
BERT = auto()
BLOOM = auto()
STABLELM = auto()
QWEN = auto()
QWEN2 = auto()
PHI2 = auto()
PLAMO = auto()
CODESHELL = auto()
ORION = auto()
LLAMA = auto()
FALCON = auto()
BAICHUAN = auto()
GPT2 = auto()
GPTJ = auto()
GPTNEOX = auto()
MPT = auto()
STARCODER = auto()
PERSIMMON = auto()
REFACT = auto()
BERT = auto()
NOMIC_BERT = auto()
BLOOM = auto()
STABLELM = auto()
QWEN = auto()
QWEN2 = auto()
PHI2 = auto()
PLAMO = auto()
CODESHELL = auto()
ORION = auto()
INTERNLM2 = auto()
MINICPM = auto()
MINICPM = auto()
class MODEL_TENSOR(IntEnum):
@ -153,6 +154,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.PERSIMMON: "persimmon",
MODEL_ARCH.REFACT: "refact",
MODEL_ARCH.BERT: "bert",
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
MODEL_ARCH.BLOOM: "bloom",
MODEL_ARCH.STABLELM: "stablelm",
MODEL_ARCH.QWEN: "qwen",
@ -282,6 +284,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.LAYER_OUT_NORM,
],
MODEL_ARCH.NOMIC_BERT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.TOKEN_EMBD_NORM,
MODEL_TENSOR.TOKEN_TYPES,
MODEL_TENSOR.POS_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.ATTN_OUT_NORM,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.LAYER_OUT_NORM,
],
MODEL_ARCH.MPT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,

View file

@ -15,7 +15,7 @@ class TensorNameMap:
"word_embeddings", # bloom
"model.embed_tokens", # llama-hf
"tok_embeddings", # llama-pth
"embeddings.word_embeddings", # bert
"embeddings.word_embeddings", # bert nomic-bert
"language_model.embedding.word_embeddings", # persimmon
"wte", # gpt2
"transformer.embd.wte", # phi2
@ -24,13 +24,14 @@ class TensorNameMap:
# Token type embeddings
MODEL_TENSOR.TOKEN_TYPES: (
"embeddings.token_type_embeddings", # bert
"embeddings.token_type_embeddings", # bert nomic-bert
),
# Normalization of token embeddings
MODEL_TENSOR.TOKEN_EMBD_NORM: (
"word_embeddings_layernorm", # bloom
"embeddings.LayerNorm", # bert
"emb_ln", # nomic-bert
),
# Position embeddings
@ -103,6 +104,7 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.query_key_value", # persimmon
"h.{bid}.attn.c_attn", # gpt2
"transformer.h.{bid}.mixer.Wqkv", # phi2
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
),
# Attention query
@ -152,11 +154,13 @@ class TensorNameMap:
"transformer.h.{bid}.mixer.out_proj", # phi2
"model.layers.layers.{bid}.self_attn.o_proj", # plamo
"model.layers.{bid}.attention.wo", # internlm2
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
),
# Attention output norm
MODEL_TENSOR.ATTN_OUT_NORM: (
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
"encoder.layers.{bid}.norm1", # nomic-bert
),
# Rotary embeddings
@ -205,6 +209,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.fc1", # phi2
"model.layers.layers.{bid}.mlp.up_proj", # plamo
"model.layers.{bid}.feed_forward.w3", # internlm2
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
),
MODEL_TENSOR.FFN_UP_EXP: (
@ -224,6 +229,7 @@ class TensorNameMap:
"transformer.h.{bid}.mlp.w2", # qwen
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
"model.layers.{bid}.feed_forward.w1", # internlm2
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
),
MODEL_TENSOR.FFN_GATE_EXP: (
@ -249,6 +255,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.fc2", # phi2
"model.layers.layers.{bid}.mlp.down_proj", # plamo
"model.layers.{bid}.feed_forward.w2", # internlm2
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
),
MODEL_TENSOR.FFN_DOWN_EXP: (
@ -272,6 +279,7 @@ class TensorNameMap:
MODEL_TENSOR.LAYER_OUT_NORM: (
"encoder.layer.{bid}.output.LayerNorm", # bert
"encoder.layers.{bid}.norm2", # nomic-bert
)
}