From f364636b2e85dedfb680c2d061eab7c6fbe52c71 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 7 Nov 2023 21:05:41 -0500 Subject: [PATCH] style cleanup with flake8 --- gguf-py/gguf/constants.py | 264 +++++++++++++++++---------------- gguf-py/gguf/gguf_reader.py | 92 ++++++------ gguf-py/gguf/gguf_writer.py | 47 +++--- gguf-py/gguf/tensor_mapping.py | 171 ++++++++++----------- gguf-py/gguf/vocab.py | 53 ++++--- 5 files changed, 331 insertions(+), 296 deletions(-) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 7a0f5c709..f4bc52674 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -8,7 +8,7 @@ from typing import Any, NamedTuple, Type # constants # -GGUF_MAGIC = 0x46554747 # "GGUF" +GGUF_MAGIC = 0x46554747 # "GGUF" GGUF_VERSION = 3 GGUF_DEFAULT_ALIGNMENT = 32 @@ -16,64 +16,71 @@ GGUF_DEFAULT_ALIGNMENT = 32 # metadata keys # + class GeneralKeys(StrEnum): - ARCHITECTURE : str = "general.architecture" + ARCHITECTURE: str = "general.architecture" QUANTIZATION_VERSION: str = "general.quantization_version" - ALIGNMENT : str = "general.alignment" - NAME : str = "general.name" - AUTHOR : str = "general.author" - URL : str = "general.url" - DESCRIPTION : str = "general.description" - LICENSE : str = "general.license" - SOURCE_URL : str = "general.source.url" - SOURCE_HF_REPO : str = "general.source.huggingface.repository" - FILE_TYPE : str = "general.file_type" + ALIGNMENT: str = "general.alignment" + NAME: str = "general.name" + AUTHOR: str = "general.author" + URL: str = "general.url" + DESCRIPTION: str = "general.description" + LICENSE: str = "general.license" + SOURCE_URL: str = "general.source.url" + SOURCE_HF_REPO: str = "general.source.huggingface.repository" + FILE_TYPE: str = "general.file_type" + class AttentionKeys(StrEnum): - HEAD_COUNT : str = "{arch}.attention.head_count" - HEAD_COUNT_KV : str = "{arch}.attention.head_count_kv" - MAX_ALIBI_BIAS : str = "{arch}.attention.max_alibi_bias" - CLAMP_KQV : str = "{arch}.attention.clamp_kqv" - LAYERNORM_EPS : str = "{arch}.attention.layer_norm_epsilon" + HEAD_COUNT: str = "{arch}.attention.head_count" + HEAD_COUNT_KV: str = "{arch}.attention.head_count_kv" + MAX_ALIBI_BIAS: str = "{arch}.attention.max_alibi_bias" + CLAMP_KQV: str = "{arch}.attention.clamp_kqv" + LAYERNORM_EPS: str = "{arch}.attention.layer_norm_epsilon" LAYERNORM_RMS_EPS: str = "{arch}.attention.layer_norm_rms_epsilon" + class RopeKeys(StrEnum): - DIMENSION_COUNT : str = "{arch}.rope.dimension_count" - FREQ_BASE : str = "{arch}.rope.freq_base" - SCALING_TYPE : str = "{arch}.rope.scaling.type" - SCALING_FACTOR : str = "{arch}.rope.scaling.factor" + DIMENSION_COUNT: str = "{arch}.rope.dimension_count" + FREQ_BASE: str = "{arch}.rope.freq_base" + SCALING_TYPE: str = "{arch}.rope.scaling.type" + SCALING_FACTOR: str = "{arch}.rope.scaling.factor" SCALING_ORIG_CTX_LEN: str = "{arch}.rope.scaling.original_context_length" - SCALING_FINETUNED : str = "{arch}.rope.scaling.finetuned" + SCALING_FINETUNED: str = "{arch}.rope.scaling.finetuned" + class TokenizerKeys(StrEnum): - MODEL : str = "tokenizer.ggml.model" - LIST : str = "tokenizer.ggml.tokens" + MODEL: str = "tokenizer.ggml.model" + LIST: str = "tokenizer.ggml.tokens" TOKEN_TYPE: str = "tokenizer.ggml.token_type" - SCORES : str = "tokenizer.ggml.scores" - MERGES : str = "tokenizer.ggml.merges" - BOS_ID : str = "tokenizer.ggml.bos_token_id" - EOS_ID : str = "tokenizer.ggml.eos_token_id" - UNK_ID : str = "tokenizer.ggml.unknown_token_id" - SEP_ID : str = "tokenizer.ggml.seperator_token_id" - PAD_ID : str = "tokenizer.ggml.padding_token_id" - HF_JSON : str = "tokenizer.huggingface.json" - RWKV : str = "tokenizer.rwkv.world" + SCORES: str = "tokenizer.ggml.scores" + MERGES: str = "tokenizer.ggml.merges" + BOS_ID: str = "tokenizer.ggml.bos_token_id" + EOS_ID: str = "tokenizer.ggml.eos_token_id" + UNK_ID: str = "tokenizer.ggml.unknown_token_id" + SEP_ID: str = "tokenizer.ggml.seperator_token_id" + PAD_ID: str = "tokenizer.ggml.padding_token_id" + HF_JSON: str = "tokenizer.huggingface.json" + RWKV: str = "tokenizer.rwkv.world" + class LLMKeys(StrEnum): - CONTEXT_LENGTH : str = "{arch}.context_length" - EMBEDDING_LENGTH : str = "{arch}.embedding_length" - BLOCK_COUNT : str = "{arch}.block_count" - FEED_FORWARD_LENGTH : str = "{arch}.feed_forward_length" + CONTEXT_LENGTH: str = "{arch}.context_length" + EMBEDDING_LENGTH: str = "{arch}.embedding_length" + BLOCK_COUNT: str = "{arch}.block_count" + FEED_FORWARD_LENGTH: str = "{arch}.feed_forward_length" USE_PARALLEL_RESIDUAL: str = "{arch}.use_parallel_residual" - TENSOR_DATA_LAYOUT : str = "{arch}.tensor_data_layout" + TENSOR_DATA_LAYOUT: str = "{arch}.tensor_data_layout" + class Keys(NamedTuple): - GENERAL : Type[GeneralKeys ] = GeneralKeys - LLM : Type[LLMKeys ] = LLMKeys + GENERAL: Type[GeneralKeys] = GeneralKeys + LLM: Type[LLMKeys] = LLMKeys ATTENTION: Type[AttentionKeys] = AttentionKeys - ROPE : Type[RopeKeys ] = RopeKeys + ROPE: Type[RopeKeys] = RopeKeys TOKENIZER: Type[TokenizerKeys] = TokenizerKeys + KEY = Keys() # @@ -82,42 +89,42 @@ KEY = Keys() class MODEL_ARCH(IntEnum): - LLAMA : int = auto() - FALCON : int = auto() - BAICHUAN : int = auto() - GPT2 : int = auto() - GPTJ : int = auto() - GPTNEOX : int = auto() - MPT : int = auto() - STARCODER : int = auto() - PERSIMMON : int = auto() - REFACT : int = auto() - BERT : int = auto() - BLOOM : int = auto() + LLAMA: int = auto() + FALCON: int = auto() + BAICHUAN: int = auto() + GPT2: int = auto() + GPTJ: int = auto() + GPTNEOX: int = auto() + MPT: int = auto() + STARCODER: int = auto() + PERSIMMON: int = auto() + REFACT: int = auto() + BERT: int = auto() + BLOOM: int = auto() class MODEL_TENSOR(IntEnum): - TOKEN_EMBD : int = auto() - TOKEN_EMBD_NORM : int = auto() - TOKEN_TYPES : int = auto() - POS_EMBD : int = auto() - OUTPUT : int = auto() - OUTPUT_NORM : int = auto() - ROPE_FREQS : int = auto() - ATTN_Q : int = auto() - ATTN_K : int = auto() - ATTN_V : int = auto() - ATTN_QKV : int = auto() - ATTN_OUT : int = auto() - ATTN_NORM : int = auto() - ATTN_NORM_2 : int = auto() - ATTN_ROT_EMBD : int = auto() - FFN_GATE : int = auto() - FFN_DOWN : int = auto() - FFN_UP : int = auto() - FFN_NORM : int = auto() - ATTN_Q_NORM : int = auto() - ATTN_K_NORM : int = auto() + TOKEN_EMBD: int = auto() + TOKEN_EMBD_NORM: int = auto() + TOKEN_TYPES: int = auto() + POS_EMBD: int = auto() + OUTPUT: int = auto() + OUTPUT_NORM: int = auto() + ROPE_FREQS: int = auto() + ATTN_Q: int = auto() + ATTN_K: int = auto() + ATTN_V: int = auto() + ATTN_QKV: int = auto() + ATTN_OUT: int = auto() + ATTN_NORM: int = auto() + ATTN_NORM_2: int = auto() + ATTN_ROT_EMBD: int = auto() + FFN_GATE: int = auto() + FFN_DOWN: int = auto() + FFN_UP: int = auto() + FFN_NORM: int = auto() + ATTN_Q_NORM: int = auto() + ATTN_K_NORM: int = auto() MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { @@ -321,13 +328,14 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { ], MODEL_ARCH.PERSIMMON: [ MODEL_TENSOR.ROPE_FREQS, - ] + ], } # # types # + class TokenType(IntEnum): NORMAL = 1 UNKNOWN = 2 @@ -336,11 +344,13 @@ class TokenType(IntEnum): UNUSED = 5 BYTE = 6 + class RopeScalingType(Enum): NONE = 'none' LINEAR = 'linear' YARN = 'yarn' + class GGMLQuantizationType(IntEnum): F32 = 0 F16 = 1 @@ -357,6 +367,7 @@ class GGMLQuantizationType(IntEnum): Q6_K = 14 Q8_K = 15 + class GGUFEndian(IntEnum): LITTLE = 0 BIG = 1 @@ -379,7 +390,7 @@ class GGUFValueType(IntEnum): @staticmethod def get_type(val: Any) -> GGUFValueType: - if isinstance(val, str) or isinstance(val, bytes) or isinstance(val, bytearray): + if isinstance(val, (str, bytes, bytearray)): return GGUFValueType.STRING elif isinstance(val, list): return GGUFValueType.ARRAY @@ -391,79 +402,80 @@ class GGUFValueType(IntEnum): return GGUFValueType.INT32 # TODO: need help with 64-bit types in Python else: - print("Unknown type: "+str(type(val))) + print("Unknown type:", type(val)) sys.exit() + # Note: Does not support GGML_QKK_64 QK_K = 256 # Items here are (block size, type size) GGML_QUANT_SIZES = { - GGMLQuantizationType.F32 : (1, 4), - GGMLQuantizationType.F16 : (1, 2), - GGMLQuantizationType.Q4_0 : (32, 2 + 16), - GGMLQuantizationType.Q4_1 : (32, 2 + 2 + 16), - GGMLQuantizationType.Q5_0 : (32, 2 + 4 + 16), - GGMLQuantizationType.Q5_1 : (32, 2 + 2 + 4 + 16), - GGMLQuantizationType.Q8_0 : (32, 2 + 32), - GGMLQuantizationType.Q8_1 : (32, 4 + 4 + 32), - GGMLQuantizationType.Q2_K : (256, 2 + 2 + QK_K // 16 + QK_K // 4), - GGMLQuantizationType.Q3_K : (256, 2 + QK_K // 4 + QK_K // 8 + 12), - GGMLQuantizationType.Q4_K : (256, 2 + 2 + QK_K // 2 + 12), - GGMLQuantizationType.Q5_K : (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12), - GGMLQuantizationType.Q6_K : (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16), - GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8), + GGMLQuantizationType.F32: (1, 4), + GGMLQuantizationType.F16: (1, 2), + GGMLQuantizationType.Q4_0: (32, 2 + 16), + GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16), + GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16), + GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16), + GGMLQuantizationType.Q8_0: (32, 2 + 32), + GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32), + GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4), + GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12), + GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12), + GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12), + GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16), + GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8), } # Aliases for backward compatibility. # general -KEY_GENERAL_ARCHITECTURE : str = KEY.GENERAL.ARCHITECTURE +KEY_GENERAL_ARCHITECTURE: str = KEY.GENERAL.ARCHITECTURE KEY_GENERAL_QUANTIZATION_VERSION: str = KEY.GENERAL.QUANTIZATION_VERSION -KEY_GENERAL_ALIGNMENT : str = KEY.GENERAL.ALIGNMENT -KEY_GENERAL_NAME : str = KEY.GENERAL.NAME -KEY_GENERAL_AUTHOR : str = KEY.GENERAL.AUTHOR -KEY_GENERAL_URL : str = KEY.GENERAL.URL -KEY_GENERAL_DESCRIPTION : str = KEY.GENERAL.DESCRIPTION -KEY_GENERAL_LICENSE : str = KEY.GENERAL.LICENSE -KEY_GENERAL_SOURCE_URL : str = KEY.GENERAL.SOURCE_URL -KEY_GENERAL_SOURCE_HF_REPO : str = KEY.GENERAL.SOURCE_HF_REPO -KEY_GENERAL_FILE_TYPE : str = KEY.GENERAL.FILE_TYPE +KEY_GENERAL_ALIGNMENT: str = KEY.GENERAL.ALIGNMENT +KEY_GENERAL_NAME: str = KEY.GENERAL.NAME +KEY_GENERAL_AUTHOR: str = KEY.GENERAL.AUTHOR +KEY_GENERAL_URL: str = KEY.GENERAL.URL +KEY_GENERAL_DESCRIPTION: str = KEY.GENERAL.DESCRIPTION +KEY_GENERAL_LICENSE: str = KEY.GENERAL.LICENSE +KEY_GENERAL_SOURCE_URL: str = KEY.GENERAL.SOURCE_URL +KEY_GENERAL_SOURCE_HF_REPO: str = KEY.GENERAL.SOURCE_HF_REPO +KEY_GENERAL_FILE_TYPE: str = KEY.GENERAL.FILE_TYPE # LLM -KEY_CONTEXT_LENGTH : str = KEY.LLM.CONTEXT_LENGTH -KEY_EMBEDDING_LENGTH : str = KEY.LLM.EMBEDDING_LENGTH -KEY_BLOCK_COUNT : str = KEY.LLM.BLOCK_COUNT -KEY_FEED_FORWARD_LENGTH : str = KEY.LLM.FEED_FORWARD_LENGTH +KEY_CONTEXT_LENGTH: str = KEY.LLM.CONTEXT_LENGTH +KEY_EMBEDDING_LENGTH: str = KEY.LLM.EMBEDDING_LENGTH +KEY_BLOCK_COUNT: str = KEY.LLM.BLOCK_COUNT +KEY_FEED_FORWARD_LENGTH: str = KEY.LLM.FEED_FORWARD_LENGTH KEY_USE_PARALLEL_RESIDUAL: str = KEY.LLM.USE_PARALLEL_RESIDUAL -KEY_TENSOR_DATA_LAYOUT : str = KEY.LLM.TENSOR_DATA_LAYOUT +KEY_TENSOR_DATA_LAYOUT: str = KEY.LLM.TENSOR_DATA_LAYOUT # attention -KEY_ATTENTION_HEAD_COUNT : str = KEY.ATTENTION.HEAD_COUNT -KEY_ATTENTION_HEAD_COUNT_KV : str = KEY.ATTENTION.HEAD_COUNT_KV -KEY_ATTENTION_MAX_ALIBI_BIAS : str = KEY.ATTENTION.MAX_ALIBI_BIAS -KEY_ATTENTION_CLAMP_KQV : str = KEY.ATTENTION.CLAMP_KQV -KEY_ATTENTION_LAYERNORM_EPS : str = KEY.ATTENTION.LAYERNORM_EPS +KEY_ATTENTION_HEAD_COUNT: str = KEY.ATTENTION.HEAD_COUNT +KEY_ATTENTION_HEAD_COUNT_KV: str = KEY.ATTENTION.HEAD_COUNT_KV +KEY_ATTENTION_MAX_ALIBI_BIAS: str = KEY.ATTENTION.MAX_ALIBI_BIAS +KEY_ATTENTION_CLAMP_KQV: str = KEY.ATTENTION.CLAMP_KQV +KEY_ATTENTION_LAYERNORM_EPS: str = KEY.ATTENTION.LAYERNORM_EPS KEY_ATTENTION_LAYERNORM_RMS_EPS: str = KEY.ATTENTION.LAYERNORM_RMS_EPS # RoPE -KEY_ROPE_DIMENSION_COUNT : str = KEY.ROPE.DIMENSION_COUNT -KEY_ROPE_FREQ_BASE : str = KEY.ROPE.FREQ_BASE -KEY_ROPE_SCALING_TYPE : str = KEY.ROPE.SCALING_TYPE -KEY_ROPE_SCALING_FACTOR : str = KEY.ROPE.SCALING_FACTOR +KEY_ROPE_DIMENSION_COUNT: str = KEY.ROPE.DIMENSION_COUNT +KEY_ROPE_FREQ_BASE: str = KEY.ROPE.FREQ_BASE +KEY_ROPE_SCALING_TYPE: str = KEY.ROPE.SCALING_TYPE +KEY_ROPE_SCALING_FACTOR: str = KEY.ROPE.SCALING_FACTOR KEY_ROPE_SCALING_ORIG_CTX_LEN: str = KEY.ROPE.SCALING_ORIG_CTX_LEN -KEY_ROPE_SCALING_FINETUNED : str = KEY.ROPE.SCALING_FINETUNED +KEY_ROPE_SCALING_FINETUNED: str = KEY.ROPE.SCALING_FINETUNED # tokenization -KEY_TOKENIZER_MODEL : str = KEY.TOKENIZER.MODEL -KEY_TOKENIZER_LIST : str = KEY.TOKENIZER.LIST +KEY_TOKENIZER_MODEL: str = KEY.TOKENIZER.MODEL +KEY_TOKENIZER_LIST: str = KEY.TOKENIZER.LIST KEY_TOKENIZER_TOKEN_TYPE: str = KEY.TOKENIZER.TOKEN_TYPE -KEY_TOKENIZER_SCORES : str = KEY.TOKENIZER.SCORES -KEY_TOKENIZER_MERGES : str = KEY.TOKENIZER.MERGES -KEY_TOKENIZER_BOS_ID : str = KEY.TOKENIZER.BOS_ID -KEY_TOKENIZER_EOS_ID : str = KEY.TOKENIZER.EOS_ID -KEY_TOKENIZER_UNK_ID : str = KEY.TOKENIZER.UNK_ID -KEY_TOKENIZER_SEP_ID : str = KEY.TOKENIZER.SEP_ID -KEY_TOKENIZER_PAD_ID : str = KEY.TOKENIZER.PAD_ID -KEY_TOKENIZER_HF_JSON : str = KEY.TOKENIZER.HF_JSON -KEY_TOKENIZER_RWKV : str = KEY.TOKENIZER.RWKV +KEY_TOKENIZER_SCORES: str = KEY.TOKENIZER.SCORES +KEY_TOKENIZER_MERGES: str = KEY.TOKENIZER.MERGES +KEY_TOKENIZER_BOS_ID: str = KEY.TOKENIZER.BOS_ID +KEY_TOKENIZER_EOS_ID: str = KEY.TOKENIZER.EOS_ID +KEY_TOKENIZER_UNK_ID: str = KEY.TOKENIZER.UNK_ID +KEY_TOKENIZER_SEP_ID: str = KEY.TOKENIZER.SEP_ID +KEY_TOKENIZER_PAD_ID: str = KEY.TOKENIZER.PAD_ID +KEY_TOKENIZER_HF_JSON: str = KEY.TOKENIZER.HF_JSON +KEY_TOKENIZER_RWKV: str = KEY.TOKENIZER.RWKV diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 479fae2a3..3326e9517 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -20,7 +20,7 @@ from gguf.constants import ( GGUF_MAGIC, GGUF_VERSION, GGMLQuantizationType, - GGUFValueType + GGUFValueType, ) READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION] @@ -76,14 +76,49 @@ class GGUFReader: GGUFValueType.BOOL: np.bool_, } + def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r'): + self.data = np.memmap(path, mode = mode) + offs = 0 + if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC: + raise ValueError('GGUF magic invalid') + offs += 4 + temp_version = self._get(offs, np.uint32) + if temp_version[0] > 2000: + self.byte_order = 'S' + temp_version = temp_version.newbyteorder(self.byte_order) + version = temp_version[0] + if version not in READER_SUPPORTED_VERSIONS: + raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle') + offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32])) + temp_counts = self._get(offs, np.uint64, 2) + offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64])) + offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64])) + tensor_count, kv_count = temp_counts + offs = self._build_fields(offs, kv_count) + offs, tensors_fields = self._build_tensors_fields(offs, tensor_count) + new_align = self.fields.get('general.alignment') + if new_align is not None: + if new_align.types != [GGUFValueType.UINT64]: + raise ValueError('Bad type for general.alignment field') + self.alignment = new_align.parts[-1][0] + padding = offs % self.alignment + if padding != 0: + offs += self.alignment - padding + self._build_tensors(offs, tensors_fields) + _DT = TypeVar('_DT', bound = npt.DTypeLike) - def _get(self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None) -> npt.NDArray[Any]: + + def _get( + self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None, + ) -> npt.NDArray[Any]: count = int(count) itemsize = int(np.empty([], dtype = dtype).itemsize) end_offs = offset + itemsize * count - return (self.data[offset:end_offs] + return ( + self.data[offset:end_offs] .view(dtype = dtype)[:count] - .newbyteorder(override_order or self.byte_order)) + .newbyteorder(override_order or self.byte_order) + ) def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: if field.name in self.fields: @@ -93,9 +128,11 @@ class GGUFReader: def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: slen = self._get(offset, np.uint64) - return (slen, self._get(offset + 8, np.uint8, slen[0])) + return slen, self._get(offset + 8, np.uint8, slen[0]) - def _get_field_parts(self, orig_offs: int, raw_type: int) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]: + def _get_field_parts( + self, orig_offs: int, raw_type: int, + ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]: offs = orig_offs types: list[GGUFValueType] = [] gtype = GGUFValueType(raw_type) @@ -104,12 +141,12 @@ class GGUFReader: if gtype == GGUFValueType.STRING: sparts: list[npt.NDArray[Any]] = list(self._get_str(offs)) size = sum(int(part.nbytes) for part in sparts) - return (size, sparts, [1], types) + return size, sparts, [1], types # Check if it's a simple scalar type. nptype = self._simple_value_map.get(gtype) if nptype is not None: val = self._get(offs, nptype) - return (int(val.nbytes), [val], [0], types) + return int(val.nbytes), [val], [0], types # Handle arrays. if gtype == GGUFValueType.ARRAY: raw_itype = self._get(offs, np.uint32) @@ -126,7 +163,7 @@ class GGUFReader: aparts += curr_parts data_idxs += (idx + idxs_offs for idx in curr_idxs) offs += curr_size - return (offs - orig_offs, aparts, data_idxs, types) + return offs - orig_offs, aparts, data_idxs, types # We can't deal with this one. raise ValueError('Unknown/unhandled field type {gtype}') @@ -164,7 +201,7 @@ class GGUFReader: orig_offs, str(bytes(kv_kdata), encoding = 'utf-8'), parts, - list(idx + idxs_offs for idx in field_idxs), + [idx + idxs_offs for idx in field_idxs], field_types, ), skip_sum = True) offs += field_size @@ -176,7 +213,7 @@ class GGUFReader: field = self._get_tensor(offs) offs += sum(int(part.nbytes) for part in field.parts) tensor_fields.append(field) - return (offs, tensor_fields) + return offs, tensor_fields def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None: tensors = [] @@ -210,37 +247,6 @@ class GGUFReader: self.tensors = tensors - def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r') -> None: - self.data = np.memmap(path, mode = mode) - offs = 0 - if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC: - raise ValueError('GGUF magic invalid') - offs += 4 - temp_version = self._get(offs, np.uint32) - if temp_version[0] > 2000: - self.byte_order = 'S' - temp_version = temp_version.newbyteorder(self.byte_order) - version = temp_version[0] - if version not in READER_SUPPORTED_VERSIONS: - raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle') - offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32])) - temp_counts = self._get(offs, np.uint64, 2) - offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64])) - offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64])) - tensor_count, kv_count = temp_counts - offs = self._build_fields(offs, kv_count) - offs, tensors_fields = self._build_tensors_fields(offs, tensor_count) - new_align = self.fields.get('general.alignment') - if new_align is not None: - if new_align.types != [GGUFValueType.UINT64]: - raise ValueError('Bad type for general.alignment field') - self.alignment = new_align.parts[-1][0] - padding = offs % self.alignment - if padding != 0: - offs += self.alignment - padding - self._build_tensors(offs, tensors_fields) - - # Example usage: if __name__ == "__main__": if len(sys.argv) < 2: @@ -250,7 +256,7 @@ if __name__ == "__main__": reader = GGUFReader(sys.argv[1], 'r') print(f'\n* Dumping {len(reader.fields)} key/value pair(s)') for n, field in enumerate(reader.fields.values(), 1): - if len(field.types) == 0: + if not field.types: pretty_type = 'N/A' elif field.types[0] == GGUFValueType.ARRAY: nest_count = len(field.types) - 1 diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 99d4d70a8..2c74cf025 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -19,7 +19,7 @@ from .constants import ( GGUFEndian, GGUFValueType, RopeScalingType, - TokenType + TokenType, ) @@ -29,6 +29,7 @@ class WriterState(Enum): KV_DATA = auto() TI_DATA = auto() + class GGUFWriter: fout: BufferedWriter temp_file: tempfile.SpooledTemporaryFile[bytes] | None @@ -47,16 +48,10 @@ class GGUFWriter: GGUFValueType.BOOL: "?", } - def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes: - pack_prefix = '' - if not skip_pack_prefix: - pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>' - return struct.pack(f'{pack_prefix}{fmt}', value) - - def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None: - self.fout.write(self._pack(fmt, value, skip_pack_prefix)) - - def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True, endianess: GGUFEndian = GGUFEndian.LITTLE) -> None: + def __init__( + self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True, + endianess: GGUFEndian = GGUFEndian.LITTLE, + ): self.fout = open(path, "wb") self.arch = arch self.endianess = endianess @@ -69,8 +64,9 @@ class GGUFWriter: self.use_temp_file = use_temp_file self.temp_file = None self.tensors = [] - print("gguf: This GGUF file is for {0} Endian only" - .format("Big" if self.endianess == GGUFEndian.BIG else "Little")) + print("gguf: This GGUF file is for {0} Endian only".format( + "Big" if self.endianess == GGUFEndian.BIG else "Little", + )) self.state = WriterState.EMPTY self.add_architecture() @@ -150,7 +146,7 @@ class GGUFWriter: self.add_val(val, GGUFValueType.BOOL) def add_string(self, key: str, val: str) -> None: - if len(val) == 0: + if not val: return self.add_key(key) self.add_val(val, GGUFValueType.STRING) @@ -177,7 +173,7 @@ class GGUFWriter: encoded_val = val.encode("utf8") if isinstance(val, str) else val self.kv_data += self._pack("Q", len(encoded_val)) self.kv_data += encoded_val - elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0: + elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and val: ltype = GGUFValueType.get_type(val[0]) if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): raise ValueError("All items in a GGUF array should be of the same type") @@ -192,7 +188,10 @@ class GGUFWriter: def ggml_pad(x: int, n: int) -> int: return ((x + n - 1) // n) * n - def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None) -> None: + def add_tensor_info( + self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], + tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None, + ) -> None: if self.state is not WriterState.EMPTY: raise ValueError(f'Expected output file to be empty, got {self.state}') @@ -215,7 +214,10 @@ class GGUFWriter: self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment) self.ti_data_count += 1 - def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None) -> None: + def add_tensor( + self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, + raw_dtype: GGMLQuantizationType | None = None, + ) -> None: if self.endianess == GGUFEndian.BIG: tensor.byteswap(inplace=True) if self.use_temp_file and self.temp_file is None: @@ -242,7 +244,7 @@ class GGUFWriter: if self.state is not WriterState.TI_DATA: raise ValueError(f'Expected output file to contain tensor info, got {self.state}') - if self.endianess==GGUFEndian.BIG: + if self.endianess == GGUFEndian.BIG: tensor.byteswap(inplace=True) self.write_padding(self.fout, self.fout.tell()) tensor.tofile(self.fout) @@ -402,3 +404,12 @@ class GGUFWriter: def add_pad_token_id(self, id: int) -> None: self.add_uint32(KEY.TOKENIZER.PAD_ID, id) + + def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes: + pack_prefix = '' + if not skip_pack_prefix: + pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>' + return struct.pack(f'{pack_prefix}{fmt}', value) + + def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None: + self.fout.write(self._pack(fmt, value, skip_pack_prefix)) diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 55df1ba0f..22ad8b8fc 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -9,14 +9,14 @@ class TensorNameMap: mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = { # Token embeddings MODEL_TENSOR.TOKEN_EMBD: ( - "gpt_neox.embed_in", # gptneox - "transformer.wte", # gpt2 gpt-j mpt refact - "transformer.word_embeddings", # falcon - "word_embeddings", # bloom - "model.embed_tokens", # llama-hf - "tok_embeddings", # llama-pth - "embeddings.word_embeddings", # bert - "language_model.embedding.word_embeddings", # persimmon + "gpt_neox.embed_in", # gptneox + "transformer.wte", # gpt2 gpt-j mpt refact + "transformer.word_embeddings", # falcon + "word_embeddings", # bloom + "model.embed_tokens", # llama-hf + "tok_embeddings", # llama-pth + "embeddings.word_embeddings", # bert + "language_model.embedding.word_embeddings", # persimmon ), # Token type embeddings @@ -37,59 +37,59 @@ class TensorNameMap: # Output MODEL_TENSOR.OUTPUT: ( - "embed_out", # gptneox - "lm_head", # gpt2 mpt falcon llama-hf baichuan - "output", # llama-pth bloom - "word_embeddings_for_head", # persimmon + "embed_out", # gptneox + "lm_head", # gpt2 mpt falcon llama-hf baichuan + "output", # llama-pth bloom + "word_embeddings_for_head", # persimmon ), # Output norm MODEL_TENSOR.OUTPUT_NORM: ( - "gpt_neox.final_layer_norm", # gptneox - "transformer.ln_f", # gpt2 gpt-j falcon - "model.norm", # llama-hf baichuan - "norm", # llama-pth - "embeddings.LayerNorm", # bert - "transformer.norm_f", # mpt - "ln_f", # refact bloom - "language_model.encoder.final_layernorm", # persimmon + "gpt_neox.final_layer_norm", # gptneox + "transformer.ln_f", # gpt2 gpt-j falcon + "model.norm", # llama-hf baichuan + "norm", # llama-pth + "embeddings.LayerNorm", # bert + "transformer.norm_f", # mpt + "ln_f", # refact bloom + "language_model.encoder.final_layernorm", # persimmon ), # Rope frequencies MODEL_TENSOR.ROPE_FREQS: ( - "rope.freqs", # llama-pth + "rope.freqs", # llama-pth ), } block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = { # Attention norm MODEL_TENSOR.ATTN_NORM: ( - "gpt_neox.layers.{bid}.input_layernorm", # gptneox - "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact - "transformer.blocks.{bid}.norm_1", # mpt - "transformer.h.{bid}.input_layernorm", # falcon7b - "h.{bid}.input_layernorm", # bloom - "transformer.h.{bid}.ln_mlp", # falcon40b - "model.layers.{bid}.input_layernorm", # llama-hf - "layers.{bid}.attention_norm", # llama-pth - "encoder.layer.{bid}.attention.output.LayerNorm", # bert - "language_model.encoder.layers.{bid}.input_layernorm", # persimmon - "model.layers.{bid}.ln1", # yi + "gpt_neox.layers.{bid}.input_layernorm", # gptneox + "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact + "transformer.blocks.{bid}.norm_1", # mpt + "transformer.h.{bid}.input_layernorm", # falcon7b + "h.{bid}.input_layernorm", # bloom + "transformer.h.{bid}.ln_mlp", # falcon40b + "model.layers.{bid}.input_layernorm", # llama-hf + "layers.{bid}.attention_norm", # llama-pth + "encoder.layer.{bid}.attention.output.LayerNorm", # bert + "language_model.encoder.layers.{bid}.input_layernorm", # persimmon + "model.layers.{bid}.ln1", # yi ), # Attention norm 2 MODEL_TENSOR.ATTN_NORM_2: ( - "transformer.h.{bid}.ln_attn", # falcon40b + "transformer.h.{bid}.ln_attn", # falcon40b ), # Attention query-key-value MODEL_TENSOR.ATTN_QKV: ( - "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox - "transformer.h.{bid}.attn.c_attn", # gpt2 - "transformer.blocks.{bid}.attn.Wqkv", # mpt - "transformer.h.{bid}.self_attention.query_key_value", # falcon - "h.{bid}.self_attention.query_key_value", # bloom - "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon + "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox + "transformer.h.{bid}.attn.c_attn", # gpt2 + "transformer.blocks.{bid}.attn.Wqkv", # mpt + "transformer.h.{bid}.self_attention.query_key_value", # falcon + "h.{bid}.self_attention.query_key_value", # bloom + "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon ), # Attention query @@ -118,69 +118,69 @@ class TensorNameMap: # Attention output MODEL_TENSOR.ATTN_OUT: ( - "gpt_neox.layers.{bid}.attention.dense", # gptneox - "transformer.h.{bid}.attn.c_proj", # gpt2 refact - "transformer.blocks.{bid}.attn.out_proj", # mpt - "transformer.h.{bid}.self_attention.dense", # falcon - "h.{bid}.self_attention.dense", # bloom - "model.layers.{bid}.self_attn.o_proj", # llama-hf - "layers.{bid}.attention.wo", # llama-pth - "encoder.layer.{bid}.attention.output.dense", # bert - "transformer.h.{bid}.attn.out_proj", # gpt-j - "language_model.encoder.layers.{bid}.self_attention.dense" # persimmon + "gpt_neox.layers.{bid}.attention.dense", # gptneox + "transformer.h.{bid}.attn.c_proj", # gpt2 refact + "transformer.blocks.{bid}.attn.out_proj", # mpt + "transformer.h.{bid}.self_attention.dense", # falcon + "h.{bid}.self_attention.dense", # bloom + "model.layers.{bid}.self_attn.o_proj", # llama-hf + "layers.{bid}.attention.wo", # llama-pth + "encoder.layer.{bid}.attention.output.dense", # bert + "transformer.h.{bid}.attn.out_proj", # gpt-j + "language_model.encoder.layers.{bid}.self_attention.dense", # persimmon ), # Rotary embeddings MODEL_TENSOR.ATTN_ROT_EMBD: ( - "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf - "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth + "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf + "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth ), # Feed-forward norm MODEL_TENSOR.FFN_NORM: ( - "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox - "transformer.h.{bid}.ln_2", # gpt2 refact - "h.{bid}.post_attention_layernorm", # bloom - "transformer.blocks.{bid}.norm_2", # mpt - "model.layers.{bid}.post_attention_layernorm", # llama-hf - "layers.{bid}.ffn_norm", # llama-pth - "encoder.layer.{bid}.output.LayerNorm", # bert - "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon - "model.layers.{bid}.ln2", # yi + "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox + "transformer.h.{bid}.ln_2", # gpt2 refact + "h.{bid}.post_attention_layernorm", # bloom + "transformer.blocks.{bid}.norm_2", # mpt + "model.layers.{bid}.post_attention_layernorm", # llama-hf + "layers.{bid}.ffn_norm", # llama-pth + "encoder.layer.{bid}.output.LayerNorm", # bert + "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon + "model.layers.{bid}.ln2", # yi ), # Feed-forward up MODEL_TENSOR.FFN_UP: ( - "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox - "transformer.h.{bid}.mlp.c_fc", # gpt2 - "transformer.blocks.{bid}.ffn.up_proj", # mpt - "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon - "h.{bid}.mlp.dense_h_to_4h", # bloom - "model.layers.{bid}.mlp.up_proj", # llama-hf refact - "layers.{bid}.feed_forward.w3", # llama-pth - "encoder.layer.{bid}.intermediate.dense", # bert - "transformer.h.{bid}.mlp.fc_in", # gpt-j - "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon + "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox + "transformer.h.{bid}.mlp.c_fc", # gpt2 + "transformer.blocks.{bid}.ffn.up_proj", # mpt + "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon + "h.{bid}.mlp.dense_h_to_4h", # bloom + "model.layers.{bid}.mlp.up_proj", # llama-hf refact + "layers.{bid}.feed_forward.w3", # llama-pth + "encoder.layer.{bid}.intermediate.dense", # bert + "transformer.h.{bid}.mlp.fc_in", # gpt-j + "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon ), # Feed-forward gate MODEL_TENSOR.FFN_GATE: ( - "model.layers.{bid}.mlp.gate_proj", # llama-hf refact - "layers.{bid}.feed_forward.w1", # llama-pth + "model.layers.{bid}.mlp.gate_proj", # llama-hf refact + "layers.{bid}.feed_forward.w1", # llama-pth ), # Feed-forward down MODEL_TENSOR.FFN_DOWN: ( - "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox - "transformer.h.{bid}.mlp.c_proj", # gpt2 refact - "transformer.blocks.{bid}.ffn.down_proj", # mpt - "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon - "h.{bid}.mlp.dense_4h_to_h", # bloom - "model.layers.{bid}.mlp.down_proj", # llama-hf - "layers.{bid}.feed_forward.w2", # llama-pth - "encoder.layer.{bid}.output.dense", # bert - "transformer.h.{bid}.mlp.fc_out", # gpt-j - "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon + "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox + "transformer.h.{bid}.mlp.c_proj", # gpt2 refact + "transformer.blocks.{bid}.ffn.down_proj", # mpt + "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon + "h.{bid}.mlp.dense_4h_to_h", # bloom + "model.layers.{bid}.mlp.down_proj", # llama-hf + "layers.{bid}.feed_forward.w2", # llama-pth + "encoder.layer.{bid}.output.dense", # bert + "transformer.h.{bid}.mlp.fc_out", # gpt-j + "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon ), MODEL_TENSOR.ATTN_Q_NORM: ( @@ -192,8 +192,8 @@ class TensorNameMap: ), MODEL_TENSOR.ROPE_FREQS: ( - "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon - ) + "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon + ), } mapping: dict[str, tuple[MODEL_TENSOR, str]] @@ -225,7 +225,7 @@ class TensorNameMap: if key.endswith(suffix): result = self.mapping.get(key[:-len(suffix)]) if result is not None: - return (result[0], result[1] + suffix) + return result[0], result[1] + suffix return None def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None: @@ -252,5 +252,6 @@ class TensorNameMap: def __repr__(self) -> str: return repr(self.mapping) + def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap: return TensorNameMap(arch, n_blocks) diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py index 92040199d..8dea75e48 100644 --- a/gguf-py/gguf/vocab.py +++ b/gguf-py/gguf/vocab.py @@ -28,6 +28,26 @@ class SpecialVocab: self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad') self._load(Path(path)) + def __repr__(self) -> str: + return f'' + + def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None: + if self.merges: + if not quiet: + print(f'gguf: Adding {len(self.merges)} merge(s).') + gw.add_token_merges(self.merges) + for typ, tokid in self.special_token_ids.items(): + handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None) + if handler is None: + print( + f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping', + file = sys.stderr, + ) + continue + if not quiet: + print(f'gguf: Setting special token type {typ} to {tokid}') + handler(tokid) + def _load(self, path: Path) -> None: if not self._try_load_from_tokenizer_json(path): self._try_load_from_config_json(path) @@ -38,9 +58,10 @@ class SpecialVocab: if self.n_vocab is None or tid < self.n_vocab: self.special_token_ids[typ] = tid return - print(f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping', - file = sys.stderr) - + print( + f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping', + file = sys.stderr, + ) def _try_load_from_tokenizer_json(self, path: Path) -> bool: tokenizer_file = path / 'tokenizer.json' @@ -50,7 +71,7 @@ class SpecialVocab: tokenizer = json.load(f) if self.load_merges: merges = tokenizer.get('model', {}).get('merges') - if isinstance(merges, list) and len(merges) > 0 and isinstance(merges[0], str): + if isinstance(merges, list) and merges and isinstance(merges[0], str): self.merges = merges tokenizer_config_file = path / 'tokenizer_config.json' added_tokens = tokenizer.get('added_tokens') @@ -70,9 +91,10 @@ class SpecialVocab: else: continue # We only need the first match here. - maybe_token_id = next(( - atok.get('id') for atok in added_tokens - if atok.get('content') == tc_content), None) + maybe_token_id = next( + (atok.get('id') for atok in added_tokens if atok.get('content') == tc_content), + None, + ) self._set_special_token(typ, maybe_token_id) return True @@ -85,20 +107,3 @@ class SpecialVocab: for typ in self.special_token_types: self._set_special_token(typ, config.get(f'{typ}_token_id')) return True - - def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None: - if len(self.merges) > 0: - if not quiet: - print(f'gguf: Adding {len(self.merges)} merge(s).') - gw.add_token_merges(self.merges) - for typ, tokid in self.special_token_ids.items(): - handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None) - if handler is None: - print(f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping', file = sys.stderr) - continue - if not quiet: - print(f'gguf: Setting special token type {typ} to {tokid}') - handler(tokid) - - def __repr__(self) -> str: - return f''