Merge branch 'master' into compilade/pyright-tests

This commit is contained in:
Francis Couture-Harpin 2024-07-07 10:51:30 -04:00
commit 0caf60a79e
31 changed files with 9399 additions and 73 deletions

View file

@ -120,7 +120,6 @@ class Keys:
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
EOT_ID = "tokenizer.ggml.eot_token_id"
#
# recommended mapping of model tensor names for storage in gguf
#
@ -163,6 +162,7 @@ class MODEL_ARCH(IntEnum):
OPENELM = auto()
ARCTIC = auto()
DEEPSEEK2 = auto()
CHATGLM = auto()
BITNET = auto()
T5 = auto()
JAIS = auto()
@ -289,6 +289,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.OPENELM: "openelm",
MODEL_ARCH.ARCTIC: "arctic",
MODEL_ARCH.DEEPSEEK2: "deepseek2",
MODEL_ARCH.CHATGLM: "chatglm",
MODEL_ARCH.BITNET: "bitnet",
MODEL_ARCH.T5: "t5",
MODEL_ARCH.JAIS: "jais",
@ -924,6 +925,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_DOWN_SHEXP,
MODEL_TENSOR.FFN_UP_SHEXP,
],
MODEL_ARCH.CHATGLM : [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.ROPE_FREQS,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.BITNET: [
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_K,
@ -1020,6 +1033,9 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.ROPE_FREQS,
MODEL_TENSOR.ATTN_ROT_EMBD,
],
MODEL_ARCH.CHATGLM: [
MODEL_TENSOR.ROPE_FREQS,
],
}
#

View file

@ -24,6 +24,7 @@ class TensorNameMap:
"backbone.embedding", # mamba
"backbone.embeddings", # mamba-hf
"transformer.in_out_embed", # Grok
"embedding.word_embeddings", # chatglm
"transformer.token_embeddings", # openelm
"shared", # t5
),
@ -55,6 +56,7 @@ class TensorNameMap:
"output", # llama-pth bloom internlm2
"word_embeddings_for_head", # persimmon
"lm_head.linear", # phi2
"output_layer", # chatglm
),
# Output norm
@ -71,12 +73,14 @@ class TensorNameMap:
"model.norm_f", # mamba-qbert
"backbone.norm_f", # mamba
"transformer.rms_norm", # Grok
"encoder.final_layernorm", # chatglm
"transformer.norm", # openelm
),
# Rope frequencies
MODEL_TENSOR.ROPE_FREQS: (
"rope.freqs", # llama-pth
"rotary_pos_emb.inv_freq", # chatglm
),
}
@ -101,6 +105,7 @@ class TensorNameMap:
"backbone.layers.{bid}.norm", # mamba
"transformer.decoder_layer.{bid}.rms_norm", # Grok
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
"encoder.layers.{bid}.input_layernorm", # chatglm
"transformer.layers.{bid}.attn_norm", # openelm
),
@ -124,6 +129,7 @@ class TensorNameMap:
"transformer.h.{bid}.mixer.Wqkv", # phi2
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
"model.layers.{bid}.self_attn.qkv_proj", # phi3
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
"transformer.layers.{bid}.attn.qkv_proj", # openelm
),
@ -135,7 +141,7 @@ class TensorNameMap:
"transformer.h.{bid}.attn.q_proj", # gpt-j
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
"model.layers.{bid}.attention.wq", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.query" # Grok
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
),
# Attention key
@ -147,7 +153,7 @@ class TensorNameMap:
"transformer.h.{bid}.attn.k", # refact
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
"model.layers.{bid}.attention.wk", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.key" # Grok
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
),
# Attention value
@ -182,6 +188,7 @@ class TensorNameMap:
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
"transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
"encoder.layers.{bid}.self_attention.dense", # chatglm
"transformer.layers.{bid}.attn.out_proj", # openelm
),
@ -218,6 +225,7 @@ class TensorNameMap:
"h.{bid}.ln_2", # gpt2
"model.layers.{bid}.ffn_norm", # internlm2
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
"transformer.layers.{bid}.ffn_norm", # openelm
),
@ -268,6 +276,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.c_fc", # starcoder2
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
"model.layers.{bid}.residual_mlp.w3", # arctic
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
),
MODEL_TENSOR.FFN_UP_EXP: (
@ -337,6 +346,7 @@ class TensorNameMap:
"transformer.layers.{bid}.ffn.proj_2", # openelm
"model.layers.{bid}.residual_mlp.w2", # arctic
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
),
MODEL_TENSOR.FFN_DOWN_EXP: (

91
gguf-py/scripts/gguf_hash.py Executable file
View file

@ -0,0 +1,91 @@
#!/usr/bin/env python3
from __future__ import annotations
import uuid
import hashlib
import logging
import argparse
import os
import sys
from pathlib import Path
from tqdm import tqdm
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))
from gguf import GGUFReader # noqa: E402
logger = logging.getLogger("gguf-hash")
# UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5')
# For more information about what field.parts and field.data represent,
# please see the comments in the modify_gguf.py example.
def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar) -> None:
sha1 = hashlib.sha1()
uuidv5_sha1 = hashlib.sha1()
uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes)
# Total Weight Calculation For Progress Bar
total_weights = 0
for n, tensor in enumerate(reader.tensors, 1):
# We don't need these
if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
continue
# Calculate Tensor Volume
sum_weights_in_tensor = 1
for dim in tensor.shape:
sum_weights_in_tensor *= dim
total_weights += sum_weights_in_tensor
# Hash Progress Bar
bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar)
# Hashing Process
for n, tensor in enumerate(reader.tensors, 1):
# We don't need these
if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
continue
# Progressbar
sum_weights_in_tensor = 1
for dim in tensor.shape:
sum_weights_in_tensor *= dim
bar.update(sum_weights_in_tensor)
sha1_layer = hashlib.sha1()
sha1_layer.update(tensor.data)
sha1.update(tensor.data)
uuidv5_sha1.update(tensor.data)
print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
# Flush Hash Progress Bar
bar.close()
# Display Hash Output
print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100
print("UUIDv5 {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100
def main() -> None:
parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
parser.add_argument("model", type=str, help="GGUF format model filename")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
parser.add_argument("--progressbar", action="store_true", help="enable progressbar")
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
reader = GGUFReader(args.model, 'r')
gguf_hash(reader, args.model, not args.progressbar)
if __name__ == '__main__':
main()