style cleanup with flake8
This commit is contained in:
parent
ce865b3ce3
commit
f364636b2e
5 changed files with 331 additions and 296 deletions
|
@ -8,7 +8,7 @@ from typing import Any, NamedTuple, Type
|
||||||
# constants
|
# constants
|
||||||
#
|
#
|
||||||
|
|
||||||
GGUF_MAGIC = 0x46554747 # "GGUF"
|
GGUF_MAGIC = 0x46554747 # "GGUF"
|
||||||
GGUF_VERSION = 3
|
GGUF_VERSION = 3
|
||||||
GGUF_DEFAULT_ALIGNMENT = 32
|
GGUF_DEFAULT_ALIGNMENT = 32
|
||||||
|
|
||||||
|
@ -16,64 +16,71 @@ GGUF_DEFAULT_ALIGNMENT = 32
|
||||||
# metadata keys
|
# metadata keys
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
class GeneralKeys(StrEnum):
|
class GeneralKeys(StrEnum):
|
||||||
ARCHITECTURE : str = "general.architecture"
|
ARCHITECTURE: str = "general.architecture"
|
||||||
QUANTIZATION_VERSION: str = "general.quantization_version"
|
QUANTIZATION_VERSION: str = "general.quantization_version"
|
||||||
ALIGNMENT : str = "general.alignment"
|
ALIGNMENT: str = "general.alignment"
|
||||||
NAME : str = "general.name"
|
NAME: str = "general.name"
|
||||||
AUTHOR : str = "general.author"
|
AUTHOR: str = "general.author"
|
||||||
URL : str = "general.url"
|
URL: str = "general.url"
|
||||||
DESCRIPTION : str = "general.description"
|
DESCRIPTION: str = "general.description"
|
||||||
LICENSE : str = "general.license"
|
LICENSE: str = "general.license"
|
||||||
SOURCE_URL : str = "general.source.url"
|
SOURCE_URL: str = "general.source.url"
|
||||||
SOURCE_HF_REPO : str = "general.source.huggingface.repository"
|
SOURCE_HF_REPO: str = "general.source.huggingface.repository"
|
||||||
FILE_TYPE : str = "general.file_type"
|
FILE_TYPE: str = "general.file_type"
|
||||||
|
|
||||||
|
|
||||||
class AttentionKeys(StrEnum):
|
class AttentionKeys(StrEnum):
|
||||||
HEAD_COUNT : str = "{arch}.attention.head_count"
|
HEAD_COUNT: str = "{arch}.attention.head_count"
|
||||||
HEAD_COUNT_KV : str = "{arch}.attention.head_count_kv"
|
HEAD_COUNT_KV: str = "{arch}.attention.head_count_kv"
|
||||||
MAX_ALIBI_BIAS : str = "{arch}.attention.max_alibi_bias"
|
MAX_ALIBI_BIAS: str = "{arch}.attention.max_alibi_bias"
|
||||||
CLAMP_KQV : str = "{arch}.attention.clamp_kqv"
|
CLAMP_KQV: str = "{arch}.attention.clamp_kqv"
|
||||||
LAYERNORM_EPS : str = "{arch}.attention.layer_norm_epsilon"
|
LAYERNORM_EPS: str = "{arch}.attention.layer_norm_epsilon"
|
||||||
LAYERNORM_RMS_EPS: str = "{arch}.attention.layer_norm_rms_epsilon"
|
LAYERNORM_RMS_EPS: str = "{arch}.attention.layer_norm_rms_epsilon"
|
||||||
|
|
||||||
|
|
||||||
class RopeKeys(StrEnum):
|
class RopeKeys(StrEnum):
|
||||||
DIMENSION_COUNT : str = "{arch}.rope.dimension_count"
|
DIMENSION_COUNT: str = "{arch}.rope.dimension_count"
|
||||||
FREQ_BASE : str = "{arch}.rope.freq_base"
|
FREQ_BASE: str = "{arch}.rope.freq_base"
|
||||||
SCALING_TYPE : str = "{arch}.rope.scaling.type"
|
SCALING_TYPE: str = "{arch}.rope.scaling.type"
|
||||||
SCALING_FACTOR : str = "{arch}.rope.scaling.factor"
|
SCALING_FACTOR: str = "{arch}.rope.scaling.factor"
|
||||||
SCALING_ORIG_CTX_LEN: str = "{arch}.rope.scaling.original_context_length"
|
SCALING_ORIG_CTX_LEN: str = "{arch}.rope.scaling.original_context_length"
|
||||||
SCALING_FINETUNED : str = "{arch}.rope.scaling.finetuned"
|
SCALING_FINETUNED: str = "{arch}.rope.scaling.finetuned"
|
||||||
|
|
||||||
|
|
||||||
class TokenizerKeys(StrEnum):
|
class TokenizerKeys(StrEnum):
|
||||||
MODEL : str = "tokenizer.ggml.model"
|
MODEL: str = "tokenizer.ggml.model"
|
||||||
LIST : str = "tokenizer.ggml.tokens"
|
LIST: str = "tokenizer.ggml.tokens"
|
||||||
TOKEN_TYPE: str = "tokenizer.ggml.token_type"
|
TOKEN_TYPE: str = "tokenizer.ggml.token_type"
|
||||||
SCORES : str = "tokenizer.ggml.scores"
|
SCORES: str = "tokenizer.ggml.scores"
|
||||||
MERGES : str = "tokenizer.ggml.merges"
|
MERGES: str = "tokenizer.ggml.merges"
|
||||||
BOS_ID : str = "tokenizer.ggml.bos_token_id"
|
BOS_ID: str = "tokenizer.ggml.bos_token_id"
|
||||||
EOS_ID : str = "tokenizer.ggml.eos_token_id"
|
EOS_ID: str = "tokenizer.ggml.eos_token_id"
|
||||||
UNK_ID : str = "tokenizer.ggml.unknown_token_id"
|
UNK_ID: str = "tokenizer.ggml.unknown_token_id"
|
||||||
SEP_ID : str = "tokenizer.ggml.seperator_token_id"
|
SEP_ID: str = "tokenizer.ggml.seperator_token_id"
|
||||||
PAD_ID : str = "tokenizer.ggml.padding_token_id"
|
PAD_ID: str = "tokenizer.ggml.padding_token_id"
|
||||||
HF_JSON : str = "tokenizer.huggingface.json"
|
HF_JSON: str = "tokenizer.huggingface.json"
|
||||||
RWKV : str = "tokenizer.rwkv.world"
|
RWKV: str = "tokenizer.rwkv.world"
|
||||||
|
|
||||||
|
|
||||||
class LLMKeys(StrEnum):
|
class LLMKeys(StrEnum):
|
||||||
CONTEXT_LENGTH : str = "{arch}.context_length"
|
CONTEXT_LENGTH: str = "{arch}.context_length"
|
||||||
EMBEDDING_LENGTH : str = "{arch}.embedding_length"
|
EMBEDDING_LENGTH: str = "{arch}.embedding_length"
|
||||||
BLOCK_COUNT : str = "{arch}.block_count"
|
BLOCK_COUNT: str = "{arch}.block_count"
|
||||||
FEED_FORWARD_LENGTH : str = "{arch}.feed_forward_length"
|
FEED_FORWARD_LENGTH: str = "{arch}.feed_forward_length"
|
||||||
USE_PARALLEL_RESIDUAL: str = "{arch}.use_parallel_residual"
|
USE_PARALLEL_RESIDUAL: str = "{arch}.use_parallel_residual"
|
||||||
TENSOR_DATA_LAYOUT : str = "{arch}.tensor_data_layout"
|
TENSOR_DATA_LAYOUT: str = "{arch}.tensor_data_layout"
|
||||||
|
|
||||||
|
|
||||||
class Keys(NamedTuple):
|
class Keys(NamedTuple):
|
||||||
GENERAL : Type[GeneralKeys ] = GeneralKeys
|
GENERAL: Type[GeneralKeys] = GeneralKeys
|
||||||
LLM : Type[LLMKeys ] = LLMKeys
|
LLM: Type[LLMKeys] = LLMKeys
|
||||||
ATTENTION: Type[AttentionKeys] = AttentionKeys
|
ATTENTION: Type[AttentionKeys] = AttentionKeys
|
||||||
ROPE : Type[RopeKeys ] = RopeKeys
|
ROPE: Type[RopeKeys] = RopeKeys
|
||||||
TOKENIZER: Type[TokenizerKeys] = TokenizerKeys
|
TOKENIZER: Type[TokenizerKeys] = TokenizerKeys
|
||||||
|
|
||||||
|
|
||||||
KEY = Keys()
|
KEY = Keys()
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -82,42 +89,42 @@ KEY = Keys()
|
||||||
|
|
||||||
|
|
||||||
class MODEL_ARCH(IntEnum):
|
class MODEL_ARCH(IntEnum):
|
||||||
LLAMA : int = auto()
|
LLAMA: int = auto()
|
||||||
FALCON : int = auto()
|
FALCON: int = auto()
|
||||||
BAICHUAN : int = auto()
|
BAICHUAN: int = auto()
|
||||||
GPT2 : int = auto()
|
GPT2: int = auto()
|
||||||
GPTJ : int = auto()
|
GPTJ: int = auto()
|
||||||
GPTNEOX : int = auto()
|
GPTNEOX: int = auto()
|
||||||
MPT : int = auto()
|
MPT: int = auto()
|
||||||
STARCODER : int = auto()
|
STARCODER: int = auto()
|
||||||
PERSIMMON : int = auto()
|
PERSIMMON: int = auto()
|
||||||
REFACT : int = auto()
|
REFACT: int = auto()
|
||||||
BERT : int = auto()
|
BERT: int = auto()
|
||||||
BLOOM : int = auto()
|
BLOOM: int = auto()
|
||||||
|
|
||||||
|
|
||||||
class MODEL_TENSOR(IntEnum):
|
class MODEL_TENSOR(IntEnum):
|
||||||
TOKEN_EMBD : int = auto()
|
TOKEN_EMBD: int = auto()
|
||||||
TOKEN_EMBD_NORM : int = auto()
|
TOKEN_EMBD_NORM: int = auto()
|
||||||
TOKEN_TYPES : int = auto()
|
TOKEN_TYPES: int = auto()
|
||||||
POS_EMBD : int = auto()
|
POS_EMBD: int = auto()
|
||||||
OUTPUT : int = auto()
|
OUTPUT: int = auto()
|
||||||
OUTPUT_NORM : int = auto()
|
OUTPUT_NORM: int = auto()
|
||||||
ROPE_FREQS : int = auto()
|
ROPE_FREQS: int = auto()
|
||||||
ATTN_Q : int = auto()
|
ATTN_Q: int = auto()
|
||||||
ATTN_K : int = auto()
|
ATTN_K: int = auto()
|
||||||
ATTN_V : int = auto()
|
ATTN_V: int = auto()
|
||||||
ATTN_QKV : int = auto()
|
ATTN_QKV: int = auto()
|
||||||
ATTN_OUT : int = auto()
|
ATTN_OUT: int = auto()
|
||||||
ATTN_NORM : int = auto()
|
ATTN_NORM: int = auto()
|
||||||
ATTN_NORM_2 : int = auto()
|
ATTN_NORM_2: int = auto()
|
||||||
ATTN_ROT_EMBD : int = auto()
|
ATTN_ROT_EMBD: int = auto()
|
||||||
FFN_GATE : int = auto()
|
FFN_GATE: int = auto()
|
||||||
FFN_DOWN : int = auto()
|
FFN_DOWN: int = auto()
|
||||||
FFN_UP : int = auto()
|
FFN_UP: int = auto()
|
||||||
FFN_NORM : int = auto()
|
FFN_NORM: int = auto()
|
||||||
ATTN_Q_NORM : int = auto()
|
ATTN_Q_NORM: int = auto()
|
||||||
ATTN_K_NORM : int = auto()
|
ATTN_K_NORM: int = auto()
|
||||||
|
|
||||||
|
|
||||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
|
@ -321,13 +328,14 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
],
|
],
|
||||||
MODEL_ARCH.PERSIMMON: [
|
MODEL_ARCH.PERSIMMON: [
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
]
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# types
|
# types
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
class TokenType(IntEnum):
|
class TokenType(IntEnum):
|
||||||
NORMAL = 1
|
NORMAL = 1
|
||||||
UNKNOWN = 2
|
UNKNOWN = 2
|
||||||
|
@ -336,11 +344,13 @@ class TokenType(IntEnum):
|
||||||
UNUSED = 5
|
UNUSED = 5
|
||||||
BYTE = 6
|
BYTE = 6
|
||||||
|
|
||||||
|
|
||||||
class RopeScalingType(Enum):
|
class RopeScalingType(Enum):
|
||||||
NONE = 'none'
|
NONE = 'none'
|
||||||
LINEAR = 'linear'
|
LINEAR = 'linear'
|
||||||
YARN = 'yarn'
|
YARN = 'yarn'
|
||||||
|
|
||||||
|
|
||||||
class GGMLQuantizationType(IntEnum):
|
class GGMLQuantizationType(IntEnum):
|
||||||
F32 = 0
|
F32 = 0
|
||||||
F16 = 1
|
F16 = 1
|
||||||
|
@ -357,6 +367,7 @@ class GGMLQuantizationType(IntEnum):
|
||||||
Q6_K = 14
|
Q6_K = 14
|
||||||
Q8_K = 15
|
Q8_K = 15
|
||||||
|
|
||||||
|
|
||||||
class GGUFEndian(IntEnum):
|
class GGUFEndian(IntEnum):
|
||||||
LITTLE = 0
|
LITTLE = 0
|
||||||
BIG = 1
|
BIG = 1
|
||||||
|
@ -379,7 +390,7 @@ class GGUFValueType(IntEnum):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_type(val: Any) -> GGUFValueType:
|
def get_type(val: Any) -> GGUFValueType:
|
||||||
if isinstance(val, str) or isinstance(val, bytes) or isinstance(val, bytearray):
|
if isinstance(val, (str, bytes, bytearray)):
|
||||||
return GGUFValueType.STRING
|
return GGUFValueType.STRING
|
||||||
elif isinstance(val, list):
|
elif isinstance(val, list):
|
||||||
return GGUFValueType.ARRAY
|
return GGUFValueType.ARRAY
|
||||||
|
@ -391,79 +402,80 @@ class GGUFValueType(IntEnum):
|
||||||
return GGUFValueType.INT32
|
return GGUFValueType.INT32
|
||||||
# TODO: need help with 64-bit types in Python
|
# TODO: need help with 64-bit types in Python
|
||||||
else:
|
else:
|
||||||
print("Unknown type: "+str(type(val)))
|
print("Unknown type:", type(val))
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
# Note: Does not support GGML_QKK_64
|
# Note: Does not support GGML_QKK_64
|
||||||
QK_K = 256
|
QK_K = 256
|
||||||
# Items here are (block size, type size)
|
# Items here are (block size, type size)
|
||||||
GGML_QUANT_SIZES = {
|
GGML_QUANT_SIZES = {
|
||||||
GGMLQuantizationType.F32 : (1, 4),
|
GGMLQuantizationType.F32: (1, 4),
|
||||||
GGMLQuantizationType.F16 : (1, 2),
|
GGMLQuantizationType.F16: (1, 2),
|
||||||
GGMLQuantizationType.Q4_0 : (32, 2 + 16),
|
GGMLQuantizationType.Q4_0: (32, 2 + 16),
|
||||||
GGMLQuantizationType.Q4_1 : (32, 2 + 2 + 16),
|
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
|
||||||
GGMLQuantizationType.Q5_0 : (32, 2 + 4 + 16),
|
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
|
||||||
GGMLQuantizationType.Q5_1 : (32, 2 + 2 + 4 + 16),
|
GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
|
||||||
GGMLQuantizationType.Q8_0 : (32, 2 + 32),
|
GGMLQuantizationType.Q8_0: (32, 2 + 32),
|
||||||
GGMLQuantizationType.Q8_1 : (32, 4 + 4 + 32),
|
GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
|
||||||
GGMLQuantizationType.Q2_K : (256, 2 + 2 + QK_K // 16 + QK_K // 4),
|
GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
|
||||||
GGMLQuantizationType.Q3_K : (256, 2 + QK_K // 4 + QK_K // 8 + 12),
|
GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
|
||||||
GGMLQuantizationType.Q4_K : (256, 2 + 2 + QK_K // 2 + 12),
|
GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
|
||||||
GGMLQuantizationType.Q5_K : (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
|
GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
|
||||||
GGMLQuantizationType.Q6_K : (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
|
GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
|
||||||
GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8),
|
GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Aliases for backward compatibility.
|
# Aliases for backward compatibility.
|
||||||
|
|
||||||
# general
|
# general
|
||||||
KEY_GENERAL_ARCHITECTURE : str = KEY.GENERAL.ARCHITECTURE
|
KEY_GENERAL_ARCHITECTURE: str = KEY.GENERAL.ARCHITECTURE
|
||||||
KEY_GENERAL_QUANTIZATION_VERSION: str = KEY.GENERAL.QUANTIZATION_VERSION
|
KEY_GENERAL_QUANTIZATION_VERSION: str = KEY.GENERAL.QUANTIZATION_VERSION
|
||||||
KEY_GENERAL_ALIGNMENT : str = KEY.GENERAL.ALIGNMENT
|
KEY_GENERAL_ALIGNMENT: str = KEY.GENERAL.ALIGNMENT
|
||||||
KEY_GENERAL_NAME : str = KEY.GENERAL.NAME
|
KEY_GENERAL_NAME: str = KEY.GENERAL.NAME
|
||||||
KEY_GENERAL_AUTHOR : str = KEY.GENERAL.AUTHOR
|
KEY_GENERAL_AUTHOR: str = KEY.GENERAL.AUTHOR
|
||||||
KEY_GENERAL_URL : str = KEY.GENERAL.URL
|
KEY_GENERAL_URL: str = KEY.GENERAL.URL
|
||||||
KEY_GENERAL_DESCRIPTION : str = KEY.GENERAL.DESCRIPTION
|
KEY_GENERAL_DESCRIPTION: str = KEY.GENERAL.DESCRIPTION
|
||||||
KEY_GENERAL_LICENSE : str = KEY.GENERAL.LICENSE
|
KEY_GENERAL_LICENSE: str = KEY.GENERAL.LICENSE
|
||||||
KEY_GENERAL_SOURCE_URL : str = KEY.GENERAL.SOURCE_URL
|
KEY_GENERAL_SOURCE_URL: str = KEY.GENERAL.SOURCE_URL
|
||||||
KEY_GENERAL_SOURCE_HF_REPO : str = KEY.GENERAL.SOURCE_HF_REPO
|
KEY_GENERAL_SOURCE_HF_REPO: str = KEY.GENERAL.SOURCE_HF_REPO
|
||||||
KEY_GENERAL_FILE_TYPE : str = KEY.GENERAL.FILE_TYPE
|
KEY_GENERAL_FILE_TYPE: str = KEY.GENERAL.FILE_TYPE
|
||||||
|
|
||||||
# LLM
|
# LLM
|
||||||
KEY_CONTEXT_LENGTH : str = KEY.LLM.CONTEXT_LENGTH
|
KEY_CONTEXT_LENGTH: str = KEY.LLM.CONTEXT_LENGTH
|
||||||
KEY_EMBEDDING_LENGTH : str = KEY.LLM.EMBEDDING_LENGTH
|
KEY_EMBEDDING_LENGTH: str = KEY.LLM.EMBEDDING_LENGTH
|
||||||
KEY_BLOCK_COUNT : str = KEY.LLM.BLOCK_COUNT
|
KEY_BLOCK_COUNT: str = KEY.LLM.BLOCK_COUNT
|
||||||
KEY_FEED_FORWARD_LENGTH : str = KEY.LLM.FEED_FORWARD_LENGTH
|
KEY_FEED_FORWARD_LENGTH: str = KEY.LLM.FEED_FORWARD_LENGTH
|
||||||
KEY_USE_PARALLEL_RESIDUAL: str = KEY.LLM.USE_PARALLEL_RESIDUAL
|
KEY_USE_PARALLEL_RESIDUAL: str = KEY.LLM.USE_PARALLEL_RESIDUAL
|
||||||
KEY_TENSOR_DATA_LAYOUT : str = KEY.LLM.TENSOR_DATA_LAYOUT
|
KEY_TENSOR_DATA_LAYOUT: str = KEY.LLM.TENSOR_DATA_LAYOUT
|
||||||
|
|
||||||
# attention
|
# attention
|
||||||
KEY_ATTENTION_HEAD_COUNT : str = KEY.ATTENTION.HEAD_COUNT
|
KEY_ATTENTION_HEAD_COUNT: str = KEY.ATTENTION.HEAD_COUNT
|
||||||
KEY_ATTENTION_HEAD_COUNT_KV : str = KEY.ATTENTION.HEAD_COUNT_KV
|
KEY_ATTENTION_HEAD_COUNT_KV: str = KEY.ATTENTION.HEAD_COUNT_KV
|
||||||
KEY_ATTENTION_MAX_ALIBI_BIAS : str = KEY.ATTENTION.MAX_ALIBI_BIAS
|
KEY_ATTENTION_MAX_ALIBI_BIAS: str = KEY.ATTENTION.MAX_ALIBI_BIAS
|
||||||
KEY_ATTENTION_CLAMP_KQV : str = KEY.ATTENTION.CLAMP_KQV
|
KEY_ATTENTION_CLAMP_KQV: str = KEY.ATTENTION.CLAMP_KQV
|
||||||
KEY_ATTENTION_LAYERNORM_EPS : str = KEY.ATTENTION.LAYERNORM_EPS
|
KEY_ATTENTION_LAYERNORM_EPS: str = KEY.ATTENTION.LAYERNORM_EPS
|
||||||
KEY_ATTENTION_LAYERNORM_RMS_EPS: str = KEY.ATTENTION.LAYERNORM_RMS_EPS
|
KEY_ATTENTION_LAYERNORM_RMS_EPS: str = KEY.ATTENTION.LAYERNORM_RMS_EPS
|
||||||
|
|
||||||
# RoPE
|
# RoPE
|
||||||
KEY_ROPE_DIMENSION_COUNT : str = KEY.ROPE.DIMENSION_COUNT
|
KEY_ROPE_DIMENSION_COUNT: str = KEY.ROPE.DIMENSION_COUNT
|
||||||
KEY_ROPE_FREQ_BASE : str = KEY.ROPE.FREQ_BASE
|
KEY_ROPE_FREQ_BASE: str = KEY.ROPE.FREQ_BASE
|
||||||
KEY_ROPE_SCALING_TYPE : str = KEY.ROPE.SCALING_TYPE
|
KEY_ROPE_SCALING_TYPE: str = KEY.ROPE.SCALING_TYPE
|
||||||
KEY_ROPE_SCALING_FACTOR : str = KEY.ROPE.SCALING_FACTOR
|
KEY_ROPE_SCALING_FACTOR: str = KEY.ROPE.SCALING_FACTOR
|
||||||
KEY_ROPE_SCALING_ORIG_CTX_LEN: str = KEY.ROPE.SCALING_ORIG_CTX_LEN
|
KEY_ROPE_SCALING_ORIG_CTX_LEN: str = KEY.ROPE.SCALING_ORIG_CTX_LEN
|
||||||
KEY_ROPE_SCALING_FINETUNED : str = KEY.ROPE.SCALING_FINETUNED
|
KEY_ROPE_SCALING_FINETUNED: str = KEY.ROPE.SCALING_FINETUNED
|
||||||
|
|
||||||
# tokenization
|
# tokenization
|
||||||
KEY_TOKENIZER_MODEL : str = KEY.TOKENIZER.MODEL
|
KEY_TOKENIZER_MODEL: str = KEY.TOKENIZER.MODEL
|
||||||
KEY_TOKENIZER_LIST : str = KEY.TOKENIZER.LIST
|
KEY_TOKENIZER_LIST: str = KEY.TOKENIZER.LIST
|
||||||
KEY_TOKENIZER_TOKEN_TYPE: str = KEY.TOKENIZER.TOKEN_TYPE
|
KEY_TOKENIZER_TOKEN_TYPE: str = KEY.TOKENIZER.TOKEN_TYPE
|
||||||
KEY_TOKENIZER_SCORES : str = KEY.TOKENIZER.SCORES
|
KEY_TOKENIZER_SCORES: str = KEY.TOKENIZER.SCORES
|
||||||
KEY_TOKENIZER_MERGES : str = KEY.TOKENIZER.MERGES
|
KEY_TOKENIZER_MERGES: str = KEY.TOKENIZER.MERGES
|
||||||
KEY_TOKENIZER_BOS_ID : str = KEY.TOKENIZER.BOS_ID
|
KEY_TOKENIZER_BOS_ID: str = KEY.TOKENIZER.BOS_ID
|
||||||
KEY_TOKENIZER_EOS_ID : str = KEY.TOKENIZER.EOS_ID
|
KEY_TOKENIZER_EOS_ID: str = KEY.TOKENIZER.EOS_ID
|
||||||
KEY_TOKENIZER_UNK_ID : str = KEY.TOKENIZER.UNK_ID
|
KEY_TOKENIZER_UNK_ID: str = KEY.TOKENIZER.UNK_ID
|
||||||
KEY_TOKENIZER_SEP_ID : str = KEY.TOKENIZER.SEP_ID
|
KEY_TOKENIZER_SEP_ID: str = KEY.TOKENIZER.SEP_ID
|
||||||
KEY_TOKENIZER_PAD_ID : str = KEY.TOKENIZER.PAD_ID
|
KEY_TOKENIZER_PAD_ID: str = KEY.TOKENIZER.PAD_ID
|
||||||
KEY_TOKENIZER_HF_JSON : str = KEY.TOKENIZER.HF_JSON
|
KEY_TOKENIZER_HF_JSON: str = KEY.TOKENIZER.HF_JSON
|
||||||
KEY_TOKENIZER_RWKV : str = KEY.TOKENIZER.RWKV
|
KEY_TOKENIZER_RWKV: str = KEY.TOKENIZER.RWKV
|
||||||
|
|
|
@ -20,7 +20,7 @@ from gguf.constants import (
|
||||||
GGUF_MAGIC,
|
GGUF_MAGIC,
|
||||||
GGUF_VERSION,
|
GGUF_VERSION,
|
||||||
GGMLQuantizationType,
|
GGMLQuantizationType,
|
||||||
GGUFValueType
|
GGUFValueType,
|
||||||
)
|
)
|
||||||
|
|
||||||
READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
|
READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
|
||||||
|
@ -76,14 +76,49 @@ class GGUFReader:
|
||||||
GGUFValueType.BOOL: np.bool_,
|
GGUFValueType.BOOL: np.bool_,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r'):
|
||||||
|
self.data = np.memmap(path, mode = mode)
|
||||||
|
offs = 0
|
||||||
|
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
|
||||||
|
raise ValueError('GGUF magic invalid')
|
||||||
|
offs += 4
|
||||||
|
temp_version = self._get(offs, np.uint32)
|
||||||
|
if temp_version[0] > 2000:
|
||||||
|
self.byte_order = 'S'
|
||||||
|
temp_version = temp_version.newbyteorder(self.byte_order)
|
||||||
|
version = temp_version[0]
|
||||||
|
if version not in READER_SUPPORTED_VERSIONS:
|
||||||
|
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
|
||||||
|
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
|
||||||
|
temp_counts = self._get(offs, np.uint64, 2)
|
||||||
|
offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
|
||||||
|
offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
|
||||||
|
tensor_count, kv_count = temp_counts
|
||||||
|
offs = self._build_fields(offs, kv_count)
|
||||||
|
offs, tensors_fields = self._build_tensors_fields(offs, tensor_count)
|
||||||
|
new_align = self.fields.get('general.alignment')
|
||||||
|
if new_align is not None:
|
||||||
|
if new_align.types != [GGUFValueType.UINT64]:
|
||||||
|
raise ValueError('Bad type for general.alignment field')
|
||||||
|
self.alignment = new_align.parts[-1][0]
|
||||||
|
padding = offs % self.alignment
|
||||||
|
if padding != 0:
|
||||||
|
offs += self.alignment - padding
|
||||||
|
self._build_tensors(offs, tensors_fields)
|
||||||
|
|
||||||
_DT = TypeVar('_DT', bound = npt.DTypeLike)
|
_DT = TypeVar('_DT', bound = npt.DTypeLike)
|
||||||
def _get(self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None) -> npt.NDArray[Any]:
|
|
||||||
|
def _get(
|
||||||
|
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None,
|
||||||
|
) -> npt.NDArray[Any]:
|
||||||
count = int(count)
|
count = int(count)
|
||||||
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
||||||
end_offs = offset + itemsize * count
|
end_offs = offset + itemsize * count
|
||||||
return (self.data[offset:end_offs]
|
return (
|
||||||
|
self.data[offset:end_offs]
|
||||||
.view(dtype = dtype)[:count]
|
.view(dtype = dtype)[:count]
|
||||||
.newbyteorder(override_order or self.byte_order))
|
.newbyteorder(override_order or self.byte_order)
|
||||||
|
)
|
||||||
|
|
||||||
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
||||||
if field.name in self.fields:
|
if field.name in self.fields:
|
||||||
|
@ -93,9 +128,11 @@ class GGUFReader:
|
||||||
|
|
||||||
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
|
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
|
||||||
slen = self._get(offset, np.uint64)
|
slen = self._get(offset, np.uint64)
|
||||||
return (slen, self._get(offset + 8, np.uint8, slen[0]))
|
return slen, self._get(offset + 8, np.uint8, slen[0])
|
||||||
|
|
||||||
def _get_field_parts(self, orig_offs: int, raw_type: int) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
|
def _get_field_parts(
|
||||||
|
self, orig_offs: int, raw_type: int,
|
||||||
|
) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
|
||||||
offs = orig_offs
|
offs = orig_offs
|
||||||
types: list[GGUFValueType] = []
|
types: list[GGUFValueType] = []
|
||||||
gtype = GGUFValueType(raw_type)
|
gtype = GGUFValueType(raw_type)
|
||||||
|
@ -104,12 +141,12 @@ class GGUFReader:
|
||||||
if gtype == GGUFValueType.STRING:
|
if gtype == GGUFValueType.STRING:
|
||||||
sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
|
sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
|
||||||
size = sum(int(part.nbytes) for part in sparts)
|
size = sum(int(part.nbytes) for part in sparts)
|
||||||
return (size, sparts, [1], types)
|
return size, sparts, [1], types
|
||||||
# Check if it's a simple scalar type.
|
# Check if it's a simple scalar type.
|
||||||
nptype = self._simple_value_map.get(gtype)
|
nptype = self._simple_value_map.get(gtype)
|
||||||
if nptype is not None:
|
if nptype is not None:
|
||||||
val = self._get(offs, nptype)
|
val = self._get(offs, nptype)
|
||||||
return (int(val.nbytes), [val], [0], types)
|
return int(val.nbytes), [val], [0], types
|
||||||
# Handle arrays.
|
# Handle arrays.
|
||||||
if gtype == GGUFValueType.ARRAY:
|
if gtype == GGUFValueType.ARRAY:
|
||||||
raw_itype = self._get(offs, np.uint32)
|
raw_itype = self._get(offs, np.uint32)
|
||||||
|
@ -126,7 +163,7 @@ class GGUFReader:
|
||||||
aparts += curr_parts
|
aparts += curr_parts
|
||||||
data_idxs += (idx + idxs_offs for idx in curr_idxs)
|
data_idxs += (idx + idxs_offs for idx in curr_idxs)
|
||||||
offs += curr_size
|
offs += curr_size
|
||||||
return (offs - orig_offs, aparts, data_idxs, types)
|
return offs - orig_offs, aparts, data_idxs, types
|
||||||
# We can't deal with this one.
|
# We can't deal with this one.
|
||||||
raise ValueError('Unknown/unhandled field type {gtype}')
|
raise ValueError('Unknown/unhandled field type {gtype}')
|
||||||
|
|
||||||
|
@ -164,7 +201,7 @@ class GGUFReader:
|
||||||
orig_offs,
|
orig_offs,
|
||||||
str(bytes(kv_kdata), encoding = 'utf-8'),
|
str(bytes(kv_kdata), encoding = 'utf-8'),
|
||||||
parts,
|
parts,
|
||||||
list(idx + idxs_offs for idx in field_idxs),
|
[idx + idxs_offs for idx in field_idxs],
|
||||||
field_types,
|
field_types,
|
||||||
), skip_sum = True)
|
), skip_sum = True)
|
||||||
offs += field_size
|
offs += field_size
|
||||||
|
@ -176,7 +213,7 @@ class GGUFReader:
|
||||||
field = self._get_tensor(offs)
|
field = self._get_tensor(offs)
|
||||||
offs += sum(int(part.nbytes) for part in field.parts)
|
offs += sum(int(part.nbytes) for part in field.parts)
|
||||||
tensor_fields.append(field)
|
tensor_fields.append(field)
|
||||||
return (offs, tensor_fields)
|
return offs, tensor_fields
|
||||||
|
|
||||||
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
||||||
tensors = []
|
tensors = []
|
||||||
|
@ -210,37 +247,6 @@ class GGUFReader:
|
||||||
self.tensors = tensors
|
self.tensors = tensors
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r') -> None:
|
|
||||||
self.data = np.memmap(path, mode = mode)
|
|
||||||
offs = 0
|
|
||||||
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
|
|
||||||
raise ValueError('GGUF magic invalid')
|
|
||||||
offs += 4
|
|
||||||
temp_version = self._get(offs, np.uint32)
|
|
||||||
if temp_version[0] > 2000:
|
|
||||||
self.byte_order = 'S'
|
|
||||||
temp_version = temp_version.newbyteorder(self.byte_order)
|
|
||||||
version = temp_version[0]
|
|
||||||
if version not in READER_SUPPORTED_VERSIONS:
|
|
||||||
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
|
|
||||||
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
|
|
||||||
temp_counts = self._get(offs, np.uint64, 2)
|
|
||||||
offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
|
|
||||||
offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
|
|
||||||
tensor_count, kv_count = temp_counts
|
|
||||||
offs = self._build_fields(offs, kv_count)
|
|
||||||
offs, tensors_fields = self._build_tensors_fields(offs, tensor_count)
|
|
||||||
new_align = self.fields.get('general.alignment')
|
|
||||||
if new_align is not None:
|
|
||||||
if new_align.types != [GGUFValueType.UINT64]:
|
|
||||||
raise ValueError('Bad type for general.alignment field')
|
|
||||||
self.alignment = new_align.parts[-1][0]
|
|
||||||
padding = offs % self.alignment
|
|
||||||
if padding != 0:
|
|
||||||
offs += self.alignment - padding
|
|
||||||
self._build_tensors(offs, tensors_fields)
|
|
||||||
|
|
||||||
|
|
||||||
# Example usage:
|
# Example usage:
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
|
@ -250,7 +256,7 @@ if __name__ == "__main__":
|
||||||
reader = GGUFReader(sys.argv[1], 'r')
|
reader = GGUFReader(sys.argv[1], 'r')
|
||||||
print(f'\n* Dumping {len(reader.fields)} key/value pair(s)')
|
print(f'\n* Dumping {len(reader.fields)} key/value pair(s)')
|
||||||
for n, field in enumerate(reader.fields.values(), 1):
|
for n, field in enumerate(reader.fields.values(), 1):
|
||||||
if len(field.types) == 0:
|
if not field.types:
|
||||||
pretty_type = 'N/A'
|
pretty_type = 'N/A'
|
||||||
elif field.types[0] == GGUFValueType.ARRAY:
|
elif field.types[0] == GGUFValueType.ARRAY:
|
||||||
nest_count = len(field.types) - 1
|
nest_count = len(field.types) - 1
|
||||||
|
|
|
@ -19,7 +19,7 @@ from .constants import (
|
||||||
GGUFEndian,
|
GGUFEndian,
|
||||||
GGUFValueType,
|
GGUFValueType,
|
||||||
RopeScalingType,
|
RopeScalingType,
|
||||||
TokenType
|
TokenType,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@ class WriterState(Enum):
|
||||||
KV_DATA = auto()
|
KV_DATA = auto()
|
||||||
TI_DATA = auto()
|
TI_DATA = auto()
|
||||||
|
|
||||||
|
|
||||||
class GGUFWriter:
|
class GGUFWriter:
|
||||||
fout: BufferedWriter
|
fout: BufferedWriter
|
||||||
temp_file: tempfile.SpooledTemporaryFile[bytes] | None
|
temp_file: tempfile.SpooledTemporaryFile[bytes] | None
|
||||||
|
@ -47,16 +48,10 @@ class GGUFWriter:
|
||||||
GGUFValueType.BOOL: "?",
|
GGUFValueType.BOOL: "?",
|
||||||
}
|
}
|
||||||
|
|
||||||
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
def __init__(
|
||||||
pack_prefix = ''
|
self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True,
|
||||||
if not skip_pack_prefix:
|
endianess: GGUFEndian = GGUFEndian.LITTLE,
|
||||||
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
):
|
||||||
return struct.pack(f'{pack_prefix}{fmt}', value)
|
|
||||||
|
|
||||||
def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
|
|
||||||
self.fout.write(self._pack(fmt, value, skip_pack_prefix))
|
|
||||||
|
|
||||||
def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True, endianess: GGUFEndian = GGUFEndian.LITTLE) -> None:
|
|
||||||
self.fout = open(path, "wb")
|
self.fout = open(path, "wb")
|
||||||
self.arch = arch
|
self.arch = arch
|
||||||
self.endianess = endianess
|
self.endianess = endianess
|
||||||
|
@ -69,8 +64,9 @@ class GGUFWriter:
|
||||||
self.use_temp_file = use_temp_file
|
self.use_temp_file = use_temp_file
|
||||||
self.temp_file = None
|
self.temp_file = None
|
||||||
self.tensors = []
|
self.tensors = []
|
||||||
print("gguf: This GGUF file is for {0} Endian only"
|
print("gguf: This GGUF file is for {0} Endian only".format(
|
||||||
.format("Big" if self.endianess == GGUFEndian.BIG else "Little"))
|
"Big" if self.endianess == GGUFEndian.BIG else "Little",
|
||||||
|
))
|
||||||
self.state = WriterState.EMPTY
|
self.state = WriterState.EMPTY
|
||||||
|
|
||||||
self.add_architecture()
|
self.add_architecture()
|
||||||
|
@ -150,7 +146,7 @@ class GGUFWriter:
|
||||||
self.add_val(val, GGUFValueType.BOOL)
|
self.add_val(val, GGUFValueType.BOOL)
|
||||||
|
|
||||||
def add_string(self, key: str, val: str) -> None:
|
def add_string(self, key: str, val: str) -> None:
|
||||||
if len(val) == 0:
|
if not val:
|
||||||
return
|
return
|
||||||
self.add_key(key)
|
self.add_key(key)
|
||||||
self.add_val(val, GGUFValueType.STRING)
|
self.add_val(val, GGUFValueType.STRING)
|
||||||
|
@ -177,7 +173,7 @@ class GGUFWriter:
|
||||||
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
||||||
self.kv_data += self._pack("Q", len(encoded_val))
|
self.kv_data += self._pack("Q", len(encoded_val))
|
||||||
self.kv_data += encoded_val
|
self.kv_data += encoded_val
|
||||||
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0:
|
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and val:
|
||||||
ltype = GGUFValueType.get_type(val[0])
|
ltype = GGUFValueType.get_type(val[0])
|
||||||
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
||||||
raise ValueError("All items in a GGUF array should be of the same type")
|
raise ValueError("All items in a GGUF array should be of the same type")
|
||||||
|
@ -192,7 +188,10 @@ class GGUFWriter:
|
||||||
def ggml_pad(x: int, n: int) -> int:
|
def ggml_pad(x: int, n: int) -> int:
|
||||||
return ((x + n - 1) // n) * n
|
return ((x + n - 1) // n) * n
|
||||||
|
|
||||||
def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None) -> None:
|
def add_tensor_info(
|
||||||
|
self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32],
|
||||||
|
tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None,
|
||||||
|
) -> None:
|
||||||
if self.state is not WriterState.EMPTY:
|
if self.state is not WriterState.EMPTY:
|
||||||
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
||||||
|
|
||||||
|
@ -215,7 +214,10 @@ class GGUFWriter:
|
||||||
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
||||||
self.ti_data_count += 1
|
self.ti_data_count += 1
|
||||||
|
|
||||||
def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None) -> None:
|
def add_tensor(
|
||||||
|
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
|
||||||
|
raw_dtype: GGMLQuantizationType | None = None,
|
||||||
|
) -> None:
|
||||||
if self.endianess == GGUFEndian.BIG:
|
if self.endianess == GGUFEndian.BIG:
|
||||||
tensor.byteswap(inplace=True)
|
tensor.byteswap(inplace=True)
|
||||||
if self.use_temp_file and self.temp_file is None:
|
if self.use_temp_file and self.temp_file is None:
|
||||||
|
@ -242,7 +244,7 @@ class GGUFWriter:
|
||||||
if self.state is not WriterState.TI_DATA:
|
if self.state is not WriterState.TI_DATA:
|
||||||
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
|
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
|
||||||
|
|
||||||
if self.endianess==GGUFEndian.BIG:
|
if self.endianess == GGUFEndian.BIG:
|
||||||
tensor.byteswap(inplace=True)
|
tensor.byteswap(inplace=True)
|
||||||
self.write_padding(self.fout, self.fout.tell())
|
self.write_padding(self.fout, self.fout.tell())
|
||||||
tensor.tofile(self.fout)
|
tensor.tofile(self.fout)
|
||||||
|
@ -402,3 +404,12 @@ class GGUFWriter:
|
||||||
|
|
||||||
def add_pad_token_id(self, id: int) -> None:
|
def add_pad_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(KEY.TOKENIZER.PAD_ID, id)
|
self.add_uint32(KEY.TOKENIZER.PAD_ID, id)
|
||||||
|
|
||||||
|
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
||||||
|
pack_prefix = ''
|
||||||
|
if not skip_pack_prefix:
|
||||||
|
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
||||||
|
return struct.pack(f'{pack_prefix}{fmt}', value)
|
||||||
|
|
||||||
|
def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
|
||||||
|
self.fout.write(self._pack(fmt, value, skip_pack_prefix))
|
||||||
|
|
|
@ -9,14 +9,14 @@ class TensorNameMap:
|
||||||
mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
||||||
# Token embeddings
|
# Token embeddings
|
||||||
MODEL_TENSOR.TOKEN_EMBD: (
|
MODEL_TENSOR.TOKEN_EMBD: (
|
||||||
"gpt_neox.embed_in", # gptneox
|
"gpt_neox.embed_in", # gptneox
|
||||||
"transformer.wte", # gpt2 gpt-j mpt refact
|
"transformer.wte", # gpt2 gpt-j mpt refact
|
||||||
"transformer.word_embeddings", # falcon
|
"transformer.word_embeddings", # falcon
|
||||||
"word_embeddings", # bloom
|
"word_embeddings", # bloom
|
||||||
"model.embed_tokens", # llama-hf
|
"model.embed_tokens", # llama-hf
|
||||||
"tok_embeddings", # llama-pth
|
"tok_embeddings", # llama-pth
|
||||||
"embeddings.word_embeddings", # bert
|
"embeddings.word_embeddings", # bert
|
||||||
"language_model.embedding.word_embeddings", # persimmon
|
"language_model.embedding.word_embeddings", # persimmon
|
||||||
),
|
),
|
||||||
|
|
||||||
# Token type embeddings
|
# Token type embeddings
|
||||||
|
@ -37,59 +37,59 @@ class TensorNameMap:
|
||||||
|
|
||||||
# Output
|
# Output
|
||||||
MODEL_TENSOR.OUTPUT: (
|
MODEL_TENSOR.OUTPUT: (
|
||||||
"embed_out", # gptneox
|
"embed_out", # gptneox
|
||||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan
|
"lm_head", # gpt2 mpt falcon llama-hf baichuan
|
||||||
"output", # llama-pth bloom
|
"output", # llama-pth bloom
|
||||||
"word_embeddings_for_head", # persimmon
|
"word_embeddings_for_head", # persimmon
|
||||||
),
|
),
|
||||||
|
|
||||||
# Output norm
|
# Output norm
|
||||||
MODEL_TENSOR.OUTPUT_NORM: (
|
MODEL_TENSOR.OUTPUT_NORM: (
|
||||||
"gpt_neox.final_layer_norm", # gptneox
|
"gpt_neox.final_layer_norm", # gptneox
|
||||||
"transformer.ln_f", # gpt2 gpt-j falcon
|
"transformer.ln_f", # gpt2 gpt-j falcon
|
||||||
"model.norm", # llama-hf baichuan
|
"model.norm", # llama-hf baichuan
|
||||||
"norm", # llama-pth
|
"norm", # llama-pth
|
||||||
"embeddings.LayerNorm", # bert
|
"embeddings.LayerNorm", # bert
|
||||||
"transformer.norm_f", # mpt
|
"transformer.norm_f", # mpt
|
||||||
"ln_f", # refact bloom
|
"ln_f", # refact bloom
|
||||||
"language_model.encoder.final_layernorm", # persimmon
|
"language_model.encoder.final_layernorm", # persimmon
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rope frequencies
|
# Rope frequencies
|
||||||
MODEL_TENSOR.ROPE_FREQS: (
|
MODEL_TENSOR.ROPE_FREQS: (
|
||||||
"rope.freqs", # llama-pth
|
"rope.freqs", # llama-pth
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
||||||
# Attention norm
|
# Attention norm
|
||||||
MODEL_TENSOR.ATTN_NORM: (
|
MODEL_TENSOR.ATTN_NORM: (
|
||||||
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
||||||
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact
|
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact
|
||||||
"transformer.blocks.{bid}.norm_1", # mpt
|
"transformer.blocks.{bid}.norm_1", # mpt
|
||||||
"transformer.h.{bid}.input_layernorm", # falcon7b
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
||||||
"h.{bid}.input_layernorm", # bloom
|
"h.{bid}.input_layernorm", # bloom
|
||||||
"transformer.h.{bid}.ln_mlp", # falcon40b
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
||||||
"model.layers.{bid}.input_layernorm", # llama-hf
|
"model.layers.{bid}.input_layernorm", # llama-hf
|
||||||
"layers.{bid}.attention_norm", # llama-pth
|
"layers.{bid}.attention_norm", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
||||||
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
||||||
"model.layers.{bid}.ln1", # yi
|
"model.layers.{bid}.ln1", # yi
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention norm 2
|
# Attention norm 2
|
||||||
MODEL_TENSOR.ATTN_NORM_2: (
|
MODEL_TENSOR.ATTN_NORM_2: (
|
||||||
"transformer.h.{bid}.ln_attn", # falcon40b
|
"transformer.h.{bid}.ln_attn", # falcon40b
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention query-key-value
|
# Attention query-key-value
|
||||||
MODEL_TENSOR.ATTN_QKV: (
|
MODEL_TENSOR.ATTN_QKV: (
|
||||||
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
||||||
"transformer.h.{bid}.attn.c_attn", # gpt2
|
"transformer.h.{bid}.attn.c_attn", # gpt2
|
||||||
"transformer.blocks.{bid}.attn.Wqkv", # mpt
|
"transformer.blocks.{bid}.attn.Wqkv", # mpt
|
||||||
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
||||||
"h.{bid}.self_attention.query_key_value", # bloom
|
"h.{bid}.self_attention.query_key_value", # bloom
|
||||||
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
|
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention query
|
# Attention query
|
||||||
|
@ -118,69 +118,69 @@ class TensorNameMap:
|
||||||
|
|
||||||
# Attention output
|
# Attention output
|
||||||
MODEL_TENSOR.ATTN_OUT: (
|
MODEL_TENSOR.ATTN_OUT: (
|
||||||
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
||||||
"transformer.h.{bid}.attn.c_proj", # gpt2 refact
|
"transformer.h.{bid}.attn.c_proj", # gpt2 refact
|
||||||
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
||||||
"transformer.h.{bid}.self_attention.dense", # falcon
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
||||||
"h.{bid}.self_attention.dense", # bloom
|
"h.{bid}.self_attention.dense", # bloom
|
||||||
"model.layers.{bid}.self_attn.o_proj", # llama-hf
|
"model.layers.{bid}.self_attn.o_proj", # llama-hf
|
||||||
"layers.{bid}.attention.wo", # llama-pth
|
"layers.{bid}.attention.wo", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.output.dense", # bert
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
||||||
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
||||||
"language_model.encoder.layers.{bid}.self_attention.dense" # persimmon
|
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rotary embeddings
|
# Rotary embeddings
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
||||||
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
||||||
"layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
|
"layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward norm
|
# Feed-forward norm
|
||||||
MODEL_TENSOR.FFN_NORM: (
|
MODEL_TENSOR.FFN_NORM: (
|
||||||
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
||||||
"transformer.h.{bid}.ln_2", # gpt2 refact
|
"transformer.h.{bid}.ln_2", # gpt2 refact
|
||||||
"h.{bid}.post_attention_layernorm", # bloom
|
"h.{bid}.post_attention_layernorm", # bloom
|
||||||
"transformer.blocks.{bid}.norm_2", # mpt
|
"transformer.blocks.{bid}.norm_2", # mpt
|
||||||
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
||||||
"layers.{bid}.ffn_norm", # llama-pth
|
"layers.{bid}.ffn_norm", # llama-pth
|
||||||
"encoder.layer.{bid}.output.LayerNorm", # bert
|
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||||
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
||||||
"model.layers.{bid}.ln2", # yi
|
"model.layers.{bid}.ln2", # yi
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward up
|
# Feed-forward up
|
||||||
MODEL_TENSOR.FFN_UP: (
|
MODEL_TENSOR.FFN_UP: (
|
||||||
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
||||||
"transformer.h.{bid}.mlp.c_fc", # gpt2
|
"transformer.h.{bid}.mlp.c_fc", # gpt2
|
||||||
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
||||||
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
||||||
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
||||||
"model.layers.{bid}.mlp.up_proj", # llama-hf refact
|
"model.layers.{bid}.mlp.up_proj", # llama-hf refact
|
||||||
"layers.{bid}.feed_forward.w3", # llama-pth
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
||||||
"encoder.layer.{bid}.intermediate.dense", # bert
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
||||||
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
||||||
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward gate
|
# Feed-forward gate
|
||||||
MODEL_TENSOR.FFN_GATE: (
|
MODEL_TENSOR.FFN_GATE: (
|
||||||
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
||||||
"layers.{bid}.feed_forward.w1", # llama-pth
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward down
|
# Feed-forward down
|
||||||
MODEL_TENSOR.FFN_DOWN: (
|
MODEL_TENSOR.FFN_DOWN: (
|
||||||
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
||||||
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact
|
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact
|
||||||
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
||||||
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
||||||
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
||||||
"model.layers.{bid}.mlp.down_proj", # llama-hf
|
"model.layers.{bid}.mlp.down_proj", # llama-hf
|
||||||
"layers.{bid}.feed_forward.w2", # llama-pth
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
||||||
"encoder.layer.{bid}.output.dense", # bert
|
"encoder.layer.{bid}.output.dense", # bert
|
||||||
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
||||||
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||||
|
@ -192,8 +192,8 @@ class TensorNameMap:
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ROPE_FREQS: (
|
MODEL_TENSOR.ROPE_FREQS: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
|
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
|
||||||
)
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
mapping: dict[str, tuple[MODEL_TENSOR, str]]
|
mapping: dict[str, tuple[MODEL_TENSOR, str]]
|
||||||
|
@ -225,7 +225,7 @@ class TensorNameMap:
|
||||||
if key.endswith(suffix):
|
if key.endswith(suffix):
|
||||||
result = self.mapping.get(key[:-len(suffix)])
|
result = self.mapping.get(key[:-len(suffix)])
|
||||||
if result is not None:
|
if result is not None:
|
||||||
return (result[0], result[1] + suffix)
|
return result[0], result[1] + suffix
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
|
def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
|
||||||
|
@ -252,5 +252,6 @@ class TensorNameMap:
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return repr(self.mapping)
|
return repr(self.mapping)
|
||||||
|
|
||||||
|
|
||||||
def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
|
def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
|
||||||
return TensorNameMap(arch, n_blocks)
|
return TensorNameMap(arch, n_blocks)
|
||||||
|
|
|
@ -28,6 +28,26 @@ class SpecialVocab:
|
||||||
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
|
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
|
||||||
self._load(Path(path))
|
self._load(Path(path))
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids or "unset"}>'
|
||||||
|
|
||||||
|
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
|
||||||
|
if self.merges:
|
||||||
|
if not quiet:
|
||||||
|
print(f'gguf: Adding {len(self.merges)} merge(s).')
|
||||||
|
gw.add_token_merges(self.merges)
|
||||||
|
for typ, tokid in self.special_token_ids.items():
|
||||||
|
handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
|
||||||
|
if handler is None:
|
||||||
|
print(
|
||||||
|
f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping',
|
||||||
|
file = sys.stderr,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
if not quiet:
|
||||||
|
print(f'gguf: Setting special token type {typ} to {tokid}')
|
||||||
|
handler(tokid)
|
||||||
|
|
||||||
def _load(self, path: Path) -> None:
|
def _load(self, path: Path) -> None:
|
||||||
if not self._try_load_from_tokenizer_json(path):
|
if not self._try_load_from_tokenizer_json(path):
|
||||||
self._try_load_from_config_json(path)
|
self._try_load_from_config_json(path)
|
||||||
|
@ -38,9 +58,10 @@ class SpecialVocab:
|
||||||
if self.n_vocab is None or tid < self.n_vocab:
|
if self.n_vocab is None or tid < self.n_vocab:
|
||||||
self.special_token_ids[typ] = tid
|
self.special_token_ids[typ] = tid
|
||||||
return
|
return
|
||||||
print(f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
|
print(
|
||||||
file = sys.stderr)
|
f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
|
||||||
|
file = sys.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||||
tokenizer_file = path / 'tokenizer.json'
|
tokenizer_file = path / 'tokenizer.json'
|
||||||
|
@ -50,7 +71,7 @@ class SpecialVocab:
|
||||||
tokenizer = json.load(f)
|
tokenizer = json.load(f)
|
||||||
if self.load_merges:
|
if self.load_merges:
|
||||||
merges = tokenizer.get('model', {}).get('merges')
|
merges = tokenizer.get('model', {}).get('merges')
|
||||||
if isinstance(merges, list) and len(merges) > 0 and isinstance(merges[0], str):
|
if isinstance(merges, list) and merges and isinstance(merges[0], str):
|
||||||
self.merges = merges
|
self.merges = merges
|
||||||
tokenizer_config_file = path / 'tokenizer_config.json'
|
tokenizer_config_file = path / 'tokenizer_config.json'
|
||||||
added_tokens = tokenizer.get('added_tokens')
|
added_tokens = tokenizer.get('added_tokens')
|
||||||
|
@ -70,9 +91,10 @@ class SpecialVocab:
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
# We only need the first match here.
|
# We only need the first match here.
|
||||||
maybe_token_id = next((
|
maybe_token_id = next(
|
||||||
atok.get('id') for atok in added_tokens
|
(atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
|
||||||
if atok.get('content') == tc_content), None)
|
None,
|
||||||
|
)
|
||||||
self._set_special_token(typ, maybe_token_id)
|
self._set_special_token(typ, maybe_token_id)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -85,20 +107,3 @@ class SpecialVocab:
|
||||||
for typ in self.special_token_types:
|
for typ in self.special_token_types:
|
||||||
self._set_special_token(typ, config.get(f'{typ}_token_id'))
|
self._set_special_token(typ, config.get(f'{typ}_token_id'))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
|
|
||||||
if len(self.merges) > 0:
|
|
||||||
if not quiet:
|
|
||||||
print(f'gguf: Adding {len(self.merges)} merge(s).')
|
|
||||||
gw.add_token_merges(self.merges)
|
|
||||||
for typ, tokid in self.special_token_ids.items():
|
|
||||||
handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
|
|
||||||
if handler is None:
|
|
||||||
print(f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping', file = sys.stderr)
|
|
||||||
continue
|
|
||||||
if not quiet:
|
|
||||||
print(f'gguf: Setting special token type {typ} to {tokid}')
|
|
||||||
handler(tokid)
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids or "unset"}>'
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue