From 531746e9535491de839c2539e09080da5d78bcaf Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Sun, 27 Aug 2023 17:18:47 -0600 Subject: [PATCH] gguf: Refactor tensor name mapping --- convert-falcon-hf-to-gguf.py | 11 +- convert-gptneox-hf-to-gguf.py | 15 +- convert-llama-7b-pth-to-gguf.py | 13 +- convert-llama-ggmlv3-to-gguf.py | 11 +- convert-llama-hf-to-gguf.py | 11 +- convert-lora-to-ggml.py | 4 +- convert.py | 22 +-- gguf-py/gguf/gguf.py | 287 +++++++++++++++++--------------- 8 files changed, 187 insertions(+), 187 deletions(-) diff --git a/convert-falcon-hf-to-gguf.py b/convert-falcon-hf-to-gguf.py index 5c69d1349..d6a01ec01 100755 --- a/convert-falcon-hf-to-gguf.py +++ b/convert-falcon-hf-to-gguf.py @@ -238,11 +238,8 @@ for part_name in part_names: data = data.squeeze().numpy() # map tensor names - if name.endswith(".weight") and name[:-7] in tensor_map: - name = tensor_map[name[:-7]] + ".weight" - elif name.endswith(".bias") and name[:-5] in tensor_map: - name = tensor_map[name[:-5]] + ".bias" - else: + new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) + if new_name is None: print("Can not map tensor '" + name + "'") sys.exit() @@ -261,9 +258,9 @@ for part_name in part_names: if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) + print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) - gguf_writer.add_tensor(name, data) + gguf_writer.add_tensor(new_name, data) print("gguf: write header") diff --git a/convert-gptneox-hf-to-gguf.py b/convert-gptneox-hf-to-gguf.py index d9c42d76b..88ac7ff7b 100755 --- a/convert-gptneox-hf-to-gguf.py +++ b/convert-gptneox-hf-to-gguf.py @@ -111,7 +111,7 @@ gguf_writer.add_layer_norm_eps(hparams["layer_norm_eps"]) print("gguf: get tokenizer metadata") -tokens: List[str] = [] +tokens: List[bytearray] = [] merges: List[str] = [] @@ -200,7 +200,7 @@ tensor_map = gguf.get_tensor_name_map(ARCH,block_count) print("gguf: get tensor metadata") if num_parts == 0: - part_names = ("pytorch_model.bin",) + part_names = iter(("pytorch_model.bin",)) else: part_names = ( f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1) @@ -226,11 +226,8 @@ for part_name in part_names: data = data.squeeze().numpy() # map tensor names - if name.endswith(".weight") and name[:-7] in tensor_map: - name = tensor_map[name[:-7]] + ".weight" - elif name.endswith(".bias") and name[:-5] in tensor_map: - name = tensor_map[name[:-5]] + ".bias" - else: + new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) + if new_name is None: print("Can not map tensor '" + name + "'") sys.exit() @@ -249,9 +246,9 @@ for part_name in part_names: if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) + print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) - gguf_writer.add_tensor(name, data) + gguf_writer.add_tensor(new_name, data) print("gguf: write header") diff --git a/convert-llama-7b-pth-to-gguf.py b/convert-llama-7b-pth-to-gguf.py index 2ab082383..037b126ed 100755 --- a/convert-llama-7b-pth-to-gguf.py +++ b/convert-llama-7b-pth-to-gguf.py @@ -11,7 +11,7 @@ import json import numpy as np import torch -from typing import Any, List +from typing import Any, List, TypeAlias from pathlib import Path from sentencepiece import SentencePieceProcessor @@ -266,11 +266,8 @@ for part_name in part_names: data = data.squeeze().numpy() # map tensor names - if name.endswith(".weight") and name[:-7] in tensor_map: - name = tensor_map[name[:-7]] + ".weight" - elif name.endswith(".bias") and name[:-5] in tensor_map: - name = tensor_map[name[:-5]] + ".bias" - else: + new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) + if new_name is None: print("Can not map tensor '" + name + "'") sys.exit() @@ -289,9 +286,9 @@ for part_name in part_names: if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) + print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) - gguf_writer.add_tensor(name, data) + gguf_writer.add_tensor(new_name, data) print("gguf: write header") diff --git a/convert-llama-ggmlv3-to-gguf.py b/convert-llama-ggmlv3-to-gguf.py index 3bf93627d..e03cdda70 100755 --- a/convert-llama-ggmlv3-to-gguf.py +++ b/convert-llama-ggmlv3-to-gguf.py @@ -259,20 +259,13 @@ class GGMLToGGUF: gguf_writer.add_eos_token_id(2) def add_tensors(self, gguf_writer): - nm = self.name_map + tensor_map = self.name_map data = self.data print(f'* Adding {len(self.model.tensors)} tensor(s)') for tensor in self.model.tensors: name = str(tensor.name, 'UTF-8') - if name.endswith('.weight'): - name = name[:-7] - suffix = '.weight' - elif name.endswith('.bias'): - name = name[:-5] - suffix = '.bias' - mapped_name = nm.get(name) + mapped_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) assert mapped_name is not None, f'Bad name {name}' - mapped_name += suffix tempdims = list(tensor.dims[:]) if len(tempdims) > 1: temp = tempdims[1] diff --git a/convert-llama-hf-to-gguf.py b/convert-llama-hf-to-gguf.py index 534b71add..e8da448ca 100755 --- a/convert-llama-hf-to-gguf.py +++ b/convert-llama-hf-to-gguf.py @@ -286,11 +286,8 @@ for part_name in part_names: data = reverse_hf_permute(data, head_count, head_count_kv) # map tensor names - if name.endswith(".weight") and name[:-7] in tensor_map: - name = tensor_map[name[:-7]] + ".weight" - elif name.endswith(".bias") and name[:-5] in tensor_map: - name = tensor_map[name[:-5]] + ".bias" - else: + new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) + if new_name is None: print("Can not map tensor '" + name + "'") sys.exit() @@ -309,9 +306,9 @@ for part_name in part_names: if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) + print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) - gguf_writer.add_tensor(name, data) + gguf_writer.add_tensor(new_name, data) print("gguf: write header") diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py index a94a7d0af..1112532ba 100755 --- a/convert-lora-to-ggml.py +++ b/convert-lora-to-ggml.py @@ -4,7 +4,7 @@ import os import re import struct import sys -from typing import Any, Dict, Sequence, TextIO +from typing import Any, Dict, Sequence, BinaryIO import numpy as np import torch @@ -46,7 +46,7 @@ def translate_tensor_name(t: str) -> str: sys.exit(1) -def write_file_header(fout: TextIO, params: Dict[str, Any]) -> None: +def write_file_header(fout: BinaryIO, params: Dict[str, Any]) -> None: fout.write(b"ggla"[::-1]) # magic (ggml lora) fout.write(struct.pack("i", 1)) # file version fout.write(struct.pack("i", params["r"])) diff --git a/convert.py b/convert.py index b3b003f3b..539ee2c41 100755 --- a/convert.py +++ b/convert.py @@ -1013,7 +1013,8 @@ def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyM for (name, tensor) in model.items()} def convert_model_names(model: LazyModel, params: Params) -> LazyModel: - tmap = gguf.get_tensor_name_map(ARCH, params.n_layer) + tmap = gguf.TensorNameMap(ARCH, params.n_layer) + should_skip: Set[gguf.MODEL_TENSOR] = gguf.MODEL_TENSOR_SKIP.get(ARCH, set()) tmp = model @@ -1035,23 +1036,16 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel: out: LazyModel = {} for name, lazy_tensor in model.items(): - name_new = name - - if name in tmap: - name_new = tmap[name] - elif name.endswith(".weight") and name[:-7] in tmap: - name_new = tmap[name[:-7]] + ".weight" - elif name.endswith(".bias") and name[:-5] in tmap: - name_new = tmap[name[:-5]] + ".bias" - else: + tensor_type, name_new = tmap.get_both(name, try_suffixes = (".weight", ".bias")) or (None, None) + if name_new is None: raise Exception(f"Unexpected tensor name: {name}") - if gguf.should_skip_tensor_TMP(ARCH, params.n_layer, name_new): + if tensor_type in should_skip: print(f"skipping tensor {name_new}") continue - else: - print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}") - out[name_new] = lazy_tensor + + print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}") + out[name_new] = lazy_tensor return out diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 8f119c534..684cf397d 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -6,7 +6,7 @@ import tempfile import numpy as np from enum import IntEnum, auto -from typing import Any, BinaryIO, IO, Dict, List, Optional, Tuple +from typing import Any, BinaryIO, IO, Dict, List, Optional, Sequence, Tuple # # constants @@ -162,167 +162,192 @@ MODEL_TENSOR_SKIP: Dict[MODEL_ARCH, List[MODEL_TENSOR]] = { } -# TODO: the following helper functions should be removed -# instead, get_tensor_name_map should return tuples of (name, MODEL_TENSOR) -# however, my Python is very bad, and I couldn't figure out how to do this, hence these functions -# REMOVE -def should_skip_tensor_TMP(arch: MODEL_ARCH, n_blocks: int, name: str) -> bool: - for skip in MODEL_TENSOR_SKIP.get(arch, []): - for i in range(n_blocks): - if name == MODEL_TENSOR_NAMES[arch][skip].format(bid=i): - return True +class TensorNameMap: + mappings_cfg: Dict[MODEL_TENSOR, Tuple[str, ...]] = { + # Token embeddings + MODEL_TENSOR.TOKEN_EMBD: ( + "gpt_neox.embed_in", # gptneox + "transformer.wte", # gpt2 mpt + "transformer.word_embeddings", # falcon + "model.embed_tokens", # llama-hf + "tok_embeddings", # llama-pth + ), - return False + # Position embeddings + MODEL_TENSOR.POS_EMBD: ( + "transformer.wpe", # gpt2 + ), + # Output + MODEL_TENSOR.OUTPUT: ( + "embed_out", # gptneox + "lm_head", # gpt2 mpt falcon llama-hf + "output", # llama-pth + ), -def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> dict: - tensor_map = {} + # Output norm + MODEL_TENSOR.OUTPUT_NORM: ( + "gpt_neox.final_layer_norm", # gptneox + "transformer.ln_f", # gpt2 falcon + "model.norm", # llama-hf + "norm", # llama-pth + ), - # Token embeddings - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.TOKEN_EMBD, None) + # Rope frequencies + MODEL_TENSOR.ROPE_FREQS: ( + "rope.freqs", # llama-pth + ), + } - tensor_map["gpt_neox.embed_in"] = mapped_to # gptneox - tensor_map["transformer.wte"] = mapped_to # gpt2 mpt - tensor_map["transformer.word_embeddings"] = mapped_to # falcon - tensor_map["model.embed_tokens"] = mapped_to # llama-hf - tensor_map["tok_embeddings"] = mapped_to # llama-pth - - # Position embeddings - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.POS_EMBD, None) - - tensor_map["transformer.wpe"] = mapped_to # gpt2 - - # Output - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.OUTPUT, None) - - tensor_map["embed_out"] = mapped_to # gptneox - tensor_map["lm_head"] = mapped_to # gpt2 mpt falcon llama-hf - tensor_map["output"] = mapped_to # llama-pth - - # Output norm - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.OUTPUT_NORM, None) - - tensor_map["gpt_neox.final_layer_norm"] = mapped_to # gptneox - tensor_map["transformer.ln_f"] = mapped_to # gpt2 falcon - tensor_map["transformer.norm_f"] = mapped_to # mpt - tensor_map["model.norm"] = mapped_to # llama-hf - tensor_map["norm"] = mapped_to # llama-pth - - # Rope frequencies - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ROPE_FREQS, None) - - tensor_map["rope.freqs"] = mapped_to # llama-pth - - # Attention and feed-forward blocks - for i in range(0, n_blocks): + block_mappings_cfg: Dict[MODEL_TENSOR, Tuple[str, ...]] = { # Attention norm - # TODO: is there are simpler way to write these 2 lines in Python? - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_NORM, None) - mapped_to = mapped_to.format(bid=i) if mapped_to else None - - tensor_map["gpt_neox.layers."+str(i)+".input_layernorm"] = mapped_to # gptneox - tensor_map["transformer.h."+str(i)+".ln_1"] = mapped_to # gpt2 - tensor_map["transformer.blocks."+str(i)+".norm_1"] = mapped_to # mpt - tensor_map["transformer.h."+str(i)+".input_layernorm"] = mapped_to # falcon7b - tensor_map["transformer.h."+str(i)+".ln_mlp"] = mapped_to # falcon40b - tensor_map["model.layers."+str(i)+".input_layernorm"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".attention_norm"] = mapped_to # llama-pth + MODEL_TENSOR.ATTN_NORM: ( + "gpt_neox.layers.{bid}.input_layernorm", # gptneox + "transformer.h.{bid}.ln_1", # gpt2 + "transformer.blocks.{bid}.norm_1", # mpt + "transformer.h.{bid}.input_layernorm", # falcon7b + "transformer.h.{bid}.ln_mlp", # falcon40b + "model.layers.{bid}.input_layernorm", # llama-hf + "layers.{bid}.attention_norm", # llama-pth + ), # Attention norm 2 - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_NORM_2, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["transformer.h."+str(i)+".ln_attn"] = mapped_to # falcon40b + MODEL_TENSOR.ATTN_NORM_2: ( + "transformer.h.{bid}.ln_attn", # falcon40b + ), # Attention query-key-value - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_QKV, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["gpt_neox.layers."+str(i)+".attention.query_key_value"] = mapped_to # gptneox - tensor_map["transformer.h."+str(i)+".attn.c_attn"] = mapped_to # gpt2 - tensor_map["transformer.blocks."+str(i)+".attn.Wqkv"] = mapped_to # mpt - tensor_map["transformer.h."+str(i)+".self_attention.query_key_value"] = mapped_to # falcon + MODEL_TENSOR.ATTN_QKV: ( + "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox + "transformer.h.{bid}.attn.c_attn", # gpt2 + "transformer.blocks.{bid}.attn.Wqkv", # mpt + "transformer.h.{bid}.self_attention.query_key_value", # falcon + ), # Attention query - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_Q, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["model.layers."+str(i)+".self_attn.q_proj"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".attention.wq"] = mapped_to # llama-pth + MODEL_TENSOR.ATTN_Q: ( + "model.layers.{bid}.self_attn.q_proj", # llama-hf + "layers.{bid}.attention.wq", # llama-pth + ), # Attention key - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_K, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["model.layers."+str(i)+".self_attn.k_proj"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".attention.wk"] = mapped_to # llama-pth + MODEL_TENSOR.ATTN_K: ( + "model.layers.{bid}.self_attn.k_proj", # llama-hf + "layers.{bid}.attention.wk", # llama-pth + ), # Attention value - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_V, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["model.layers."+str(i)+".self_attn.v_proj"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".attention.wv"] = mapped_to # llama-pth + MODEL_TENSOR.ATTN_V: ( + "model.layers.{bid}.self_attn.v_proj", # llama-hf + "layers.{bid}.attention.wv", # llama-pth + ), # Attention output - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_OUT, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["gpt_neox.layers."+str(i)+".attention.dense"] = mapped_to # gptneox - tensor_map["transformer.h."+str(i)+".attn.c_proj"] = mapped_to # gpt2 - tensor_map["transformer.blocks."+str(i)+".attn.out_proj"] = mapped_to # mpt - tensor_map["transformer.h."+str(i)+".self_attention.dense"] = mapped_to # falcon - tensor_map["model.layers."+str(i)+".self_attn.o_proj"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".attention.wo"] = mapped_to # llama-pth + MODEL_TENSOR.ATTN_OUT: ( + "gpt_neox.layers.{bid}.attention.dense", # gptneox + "transformer.h.{bid}.attn.c_proj", # gpt2 + "transformer.blocks.{bid}.attn.out_proj", # mpt + "transformer.h.{bid}.self_attention.dense", # falcon + "model.layers.{bid}.self_attn.o_proj", # llama-hf + "layers.{bid}.attention.wo", # llama-pth + ), # Rotary embeddings - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_ROT_EMBD, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["model.layers."+str(i)+".self_attn.rotary_emb.inv_freq"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".attention.inner_attention.rope.freqs"] = mapped_to # llama-pth + MODEL_TENSOR.ATTN_ROT_EMBD: ( + "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf + "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth + ), # Feed-forward norm - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_NORM, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["gpt_neox.layers."+str(i)+".post_attention_layernorm"] = mapped_to # gptneox - tensor_map["transformer.h."+str(i)+".ln_2"] = mapped_to # gpt2 - tensor_map["transformer.blocks."+str(i)+".norm_2"] = mapped_to # mpt - tensor_map["model.layers."+str(i)+".post_attention_layernorm"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".ffn_norm"] = mapped_to # llama-pth + MODEL_TENSOR.FFN_NORM: ( + "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox + "transformer.h.{bid}.ln_2", # gpt2 + "transformer.blocks.{bid}.norm_2", # mpt + "model.layers.{bid}.post_attention_layernorm", # llama-hf + "layers.{bid}.ffn_norm", # llama-pth + ), # Feed-forward up - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_UP, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_h_to_4h"] = mapped_to # gptneox - tensor_map["transformer.h."+str(i)+".mlp.c_fc"] = mapped_to # gpt2 - tensor_map["transformer.blocks."+str(i)+".ffn.up_proj"] = mapped_to # mpt - tensor_map["transformer.h."+str(i)+".mlp.dense_h_to_4h"] = mapped_to # falcon - tensor_map["model.layers."+str(i)+".mlp.up_proj"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".feed_forward.w3"] = mapped_to # llama-pth + MODEL_TENSOR.FFN_UP: ( + "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox + "transformer.h.{bid}.mlp.c_fc", # gpt2 + "transformer.blocks.{bid}.ffn.up_proj", # mpt + "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon + "model.layers.{bid}.mlp.up_proj", # llama-hf + "layers.{bid}.feed_forward.w3", # llama-pth + ), # Feed-forward gate - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_GATE, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None - - tensor_map["model.layers."+str(i)+".mlp.gate_proj"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".feed_forward.w1"] = mapped_to # llama-pth + MODEL_TENSOR.FFN_GATE: ( + "model.layers.{bid}.mlp.gate_proj", # llama-hf + "layers.{bid}.feed_forward.w1", # llama-pth + ), # Feed-forward down - mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_DOWN, None) - mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None + MODEL_TENSOR.FFN_DOWN: ( + "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox + "transformer.h.{bid}.mlp.c_proj", # gpt2 + "transformer.blocks.{bid}.ffn.down_proj", # mpt + "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon + "model.layers.{bid}.mlp.down_proj", # llama-hf + "layers.{bid}.feed_forward.w2", # llama-pth + ), + } - tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_4h_to_h"] = mapped_to # gptneox - tensor_map["transformer.h."+str(i)+".mlp.c_proj"] = mapped_to # gpt2 - tensor_map["transformer.blocks."+str(i)+".ffn.down_proj"] = mapped_to # mpt - tensor_map["transformer.h."+str(i)+".mlp.dense_4h_to_h"] = mapped_to # falcon - tensor_map["model.layers."+str(i)+".mlp.down_proj"] = mapped_to # llama-hf - tensor_map["layers."+str(i)+".feed_forward.w2"] = mapped_to # llama-pth + mapping: Dict[str, Tuple[MODEL_TENSOR, str]] - return tensor_map + tensor_names: Dict[MODEL_TENSOR, str] + def __init__(self, arch: MODEL_ARCH, n_blocks: int): + mapping = self.mapping = {} + tensor_names = self.tensor_names = MODEL_TENSOR_NAMES[arch] + for tensor, keys in self.mappings_cfg.items(): + tensor_name = tensor_names.get(tensor) + if tensor_name is None: + continue + for key in keys: + mapping[key] = (tensor, tensor_name) + for bid in range(n_blocks): + for tensor, keys in self.block_mappings_cfg.items(): + tensor_name = tensor_names.get(tensor) + if tensor_name is None: + continue + tensor_name = tensor_name.format(bid = bid) + for key in keys: + key = key.format(bid = bid) + mapping[key] = (tensor, tensor_name) + + def get_both(self, key: str, try_suffixes: Sequence[str]) -> Optional[Tuple[MODEL_TENSOR, str]]: + result = self.mapping.get(key) + if result is not None: + return result + for suffix in try_suffixes: + if key.endswith(suffix): + result = self.mapping.get(key[:-len(suffix)]) + if result is not None: + return (result[0], result[1] + suffix) + return None + + def get_name(self, key: str, try_suffixes: Sequence[str]) -> Optional[str]: + result = self.get_both(key, try_suffixes = try_suffixes) + if result is None: + return None + return result[1] + + def __getitem__(self, key: str) -> str: + try: + return self.mapping[key][1] + except KeyError: + raise KeyError(key) + + def __contains__(self, key: str) -> bool: + return key in self.mapping + + def __repr__(self) -> str: + return repr(self.mapping) + +def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap: + return TensorNameMap(arch, n_blocks) class TokenType(IntEnum): NORMAL = 1