diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 8f82ee64c..0c0b4ba8a 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -1,7 +1,7 @@ from __future__ import annotations import sys -from enum import IntEnum, StrEnum, auto +from enum import Enum, IntEnum, auto from typing import Any # @@ -17,68 +17,57 @@ GGUF_DEFAULT_ALIGNMENT = 32 # -class GeneralKeys(StrEnum): - ARCHITECTURE = "general.architecture" - QUANTIZATION_VERSION = "general.quantization_version" - ALIGNMENT = "general.alignment" - NAME = "general.name" - AUTHOR = "general.author" - URL = "general.url" - DESCRIPTION = "general.description" - LICENSE = "general.license" - SOURCE_URL = "general.source.url" - SOURCE_HF_REPO = "general.source.huggingface.repository" - FILE_TYPE = "general.file_type" - - -class AttentionKeys(StrEnum): - HEAD_COUNT = "{arch}.attention.head_count" - HEAD_COUNT_KV = "{arch}.attention.head_count_kv" - MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias" - CLAMP_KQV = "{arch}.attention.clamp_kqv" - LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon" - LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon" - - -class RopeKeys(StrEnum): - DIMENSION_COUNT = "{arch}.rope.dimension_count" - FREQ_BASE = "{arch}.rope.freq_base" - SCALING_TYPE = "{arch}.rope.scaling.type" - SCALING_FACTOR = "{arch}.rope.scaling.factor" - SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length" - SCALING_FINETUNED = "{arch}.rope.scaling.finetuned" - - -class TokenizerKeys(StrEnum): - MODEL = "tokenizer.ggml.model" - LIST = "tokenizer.ggml.tokens" - TOKEN_TYPE = "tokenizer.ggml.token_type" - SCORES = "tokenizer.ggml.scores" - MERGES = "tokenizer.ggml.merges" - BOS_ID = "tokenizer.ggml.bos_token_id" - EOS_ID = "tokenizer.ggml.eos_token_id" - UNK_ID = "tokenizer.ggml.unknown_token_id" - SEP_ID = "tokenizer.ggml.seperator_token_id" - PAD_ID = "tokenizer.ggml.padding_token_id" - HF_JSON = "tokenizer.huggingface.json" - RWKV = "tokenizer.rwkv.world" - - -class LLMKeys(StrEnum): - CONTEXT_LENGTH = "{arch}.context_length" - EMBEDDING_LENGTH = "{arch}.embedding_length" - BLOCK_COUNT = "{arch}.block_count" - FEED_FORWARD_LENGTH = "{arch}.feed_forward_length" - USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual" - TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout" - - class Keys: - GENERAL = GeneralKeys - LLM = LLMKeys - ATTENTION = AttentionKeys - ROPE = RopeKeys - TOKENIZER = TokenizerKeys + class General: + ARCHITECTURE = "general.architecture" + QUANTIZATION_VERSION = "general.quantization_version" + ALIGNMENT = "general.alignment" + NAME = "general.name" + AUTHOR = "general.author" + URL = "general.url" + DESCRIPTION = "general.description" + LICENSE = "general.license" + SOURCE_URL = "general.source.url" + SOURCE_HF_REPO = "general.source.huggingface.repository" + FILE_TYPE = "general.file_type" + + class LLM: + CONTEXT_LENGTH = "{arch}.context_length" + EMBEDDING_LENGTH = "{arch}.embedding_length" + BLOCK_COUNT = "{arch}.block_count" + FEED_FORWARD_LENGTH = "{arch}.feed_forward_length" + USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual" + TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout" + + class Attention: + HEAD_COUNT = "{arch}.attention.head_count" + HEAD_COUNT_KV = "{arch}.attention.head_count_kv" + MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias" + CLAMP_KQV = "{arch}.attention.clamp_kqv" + LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon" + LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon" + + class Rope: + DIMENSION_COUNT = "{arch}.rope.dimension_count" + FREQ_BASE = "{arch}.rope.freq_base" + SCALING_TYPE = "{arch}.rope.scaling.type" + SCALING_FACTOR = "{arch}.rope.scaling.factor" + SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length" + SCALING_FINETUNED = "{arch}.rope.scaling.finetuned" + + class Tokenizer: + MODEL = "tokenizer.ggml.model" + LIST = "tokenizer.ggml.tokens" + TOKEN_TYPE = "tokenizer.ggml.token_type" + SCORES = "tokenizer.ggml.scores" + MERGES = "tokenizer.ggml.merges" + BOS_ID = "tokenizer.ggml.bos_token_id" + EOS_ID = "tokenizer.ggml.eos_token_id" + UNK_ID = "tokenizer.ggml.unknown_token_id" + SEP_ID = "tokenizer.ggml.seperator_token_id" + PAD_ID = "tokenizer.ggml.padding_token_id" + HF_JSON = "tokenizer.huggingface.json" + RWKV = "tokenizer.rwkv.world" # @@ -343,7 +332,7 @@ class TokenType(IntEnum): BYTE = 6 -class RopeScalingType(StrEnum): +class RopeScalingType(Enum): NONE = 'none' LINEAR = 'linear' YARN = 'yarn' @@ -428,17 +417,17 @@ GGML_QUANT_SIZES = { # Aliases for backward compatibility. # general -KEY_GENERAL_ARCHITECTURE = Keys.GENERAL.ARCHITECTURE -KEY_GENERAL_QUANTIZATION_VERSION = Keys.GENERAL.QUANTIZATION_VERSION -KEY_GENERAL_ALIGNMENT = Keys.GENERAL.ALIGNMENT -KEY_GENERAL_NAME = Keys.GENERAL.NAME -KEY_GENERAL_AUTHOR = Keys.GENERAL.AUTHOR -KEY_GENERAL_URL = Keys.GENERAL.URL -KEY_GENERAL_DESCRIPTION = Keys.GENERAL.DESCRIPTION -KEY_GENERAL_LICENSE = Keys.GENERAL.LICENSE -KEY_GENERAL_SOURCE_URL = Keys.GENERAL.SOURCE_URL -KEY_GENERAL_SOURCE_HF_REPO = Keys.GENERAL.SOURCE_HF_REPO -KEY_GENERAL_FILE_TYPE = Keys.GENERAL.FILE_TYPE +KEY_GENERAL_ARCHITECTURE = Keys.General.ARCHITECTURE +KEY_GENERAL_QUANTIZATION_VERSION = Keys.General.QUANTIZATION_VERSION +KEY_GENERAL_ALIGNMENT = Keys.General.ALIGNMENT +KEY_GENERAL_NAME = Keys.General.NAME +KEY_GENERAL_AUTHOR = Keys.General.AUTHOR +KEY_GENERAL_URL = Keys.General.URL +KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION +KEY_GENERAL_LICENSE = Keys.General.LICENSE +KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL +KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO +KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE # LLM KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH @@ -449,31 +438,31 @@ KEY_USE_PARALLEL_RESIDUAL = Keys.LLM.USE_PARALLEL_RESIDUAL KEY_TENSOR_DATA_LAYOUT = Keys.LLM.TENSOR_DATA_LAYOUT # attention -KEY_ATTENTION_HEAD_COUNT = Keys.ATTENTION.HEAD_COUNT -KEY_ATTENTION_HEAD_COUNT_KV = Keys.ATTENTION.HEAD_COUNT_KV -KEY_ATTENTION_MAX_ALIBI_BIAS = Keys.ATTENTION.MAX_ALIBI_BIAS -KEY_ATTENTION_CLAMP_KQV = Keys.ATTENTION.CLAMP_KQV -KEY_ATTENTION_LAYERNORM_EPS = Keys.ATTENTION.LAYERNORM_EPS -KEY_ATTENTION_LAYERNORM_RMS_EPS = Keys.ATTENTION.LAYERNORM_RMS_EPS +KEY_ATTENTION_HEAD_COUNT = Keys.Attention.HEAD_COUNT +KEY_ATTENTION_HEAD_COUNT_KV = Keys.Attention.HEAD_COUNT_KV +KEY_ATTENTION_MAX_ALIBI_BIAS = Keys.Attention.MAX_ALIBI_BIAS +KEY_ATTENTION_CLAMP_KQV = Keys.Attention.CLAMP_KQV +KEY_ATTENTION_LAYERNORM_EPS = Keys.Attention.LAYERNORM_EPS +KEY_ATTENTION_LAYERNORM_RMS_EPS = Keys.Attention.LAYERNORM_RMS_EPS # RoPE -KEY_ROPE_DIMENSION_COUNT = Keys.ROPE.DIMENSION_COUNT -KEY_ROPE_FREQ_BASE = Keys.ROPE.FREQ_BASE -KEY_ROPE_SCALING_TYPE = Keys.ROPE.SCALING_TYPE -KEY_ROPE_SCALING_FACTOR = Keys.ROPE.SCALING_FACTOR -KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.ROPE.SCALING_ORIG_CTX_LEN -KEY_ROPE_SCALING_FINETUNED = Keys.ROPE.SCALING_FINETUNED +KEY_ROPE_DIMENSION_COUNT = Keys.Rope.DIMENSION_COUNT +KEY_ROPE_FREQ_BASE = Keys.Rope.FREQ_BASE +KEY_ROPE_SCALING_TYPE = Keys.Rope.SCALING_TYPE +KEY_ROPE_SCALING_FACTOR = Keys.Rope.SCALING_FACTOR +KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN +KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED # tokenization -KEY_TOKENIZER_MODEL = Keys.TOKENIZER.MODEL -KEY_TOKENIZER_LIST = Keys.TOKENIZER.LIST -KEY_TOKENIZER_TOKEN_TYPE = Keys.TOKENIZER.TOKEN_TYPE -KEY_TOKENIZER_SCORES = Keys.TOKENIZER.SCORES -KEY_TOKENIZER_MERGES = Keys.TOKENIZER.MERGES -KEY_TOKENIZER_BOS_ID = Keys.TOKENIZER.BOS_ID -KEY_TOKENIZER_EOS_ID = Keys.TOKENIZER.EOS_ID -KEY_TOKENIZER_UNK_ID = Keys.TOKENIZER.UNK_ID -KEY_TOKENIZER_SEP_ID = Keys.TOKENIZER.SEP_ID -KEY_TOKENIZER_PAD_ID = Keys.TOKENIZER.PAD_ID -KEY_TOKENIZER_HF_JSON = Keys.TOKENIZER.HF_JSON -KEY_TOKENIZER_RWKV = Keys.TOKENIZER.RWKV +KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL +KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST +KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE +KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES +KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES +KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID +KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID +KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID +KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID +KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID +KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON +KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index a180c7eae..2f437625a 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -278,132 +278,120 @@ class GGUFWriter: self.fout.close() def add_architecture(self) -> None: - self.add_string(Keys.GENERAL.ARCHITECTURE, self.arch) + self.add_string(Keys.General.ARCHITECTURE, self.arch) def add_author(self, author: str) -> None: - self.add_string(Keys.GENERAL.AUTHOR, author) + self.add_string(Keys.General.AUTHOR, author) def add_tensor_data_layout(self, layout: str) -> None: - self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.value.format(arch=self.arch), layout) + self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) def add_url(self, url: str) -> None: - self.add_string(Keys.GENERAL.URL, url) + self.add_string(Keys.General.URL, url) def add_description(self, description: str) -> None: - self.add_string(Keys.GENERAL.DESCRIPTION, description) + self.add_string(Keys.General.DESCRIPTION, description) def add_source_url(self, url: str) -> None: - self.add_string(Keys.GENERAL.SOURCE_URL, url) + self.add_string(Keys.General.SOURCE_URL, url) def add_source_hf_repo(self, repo: str) -> None: - self.add_string(Keys.GENERAL.SOURCE_HF_REPO, repo) + self.add_string(Keys.General.SOURCE_HF_REPO, repo) def add_file_type(self, ftype: int) -> None: - self.add_uint32(Keys.GENERAL.FILE_TYPE, ftype) + self.add_uint32(Keys.General.FILE_TYPE, ftype) def add_name(self, name: str) -> None: - self.add_string(Keys.GENERAL.NAME, name) + self.add_string(Keys.General.NAME, name) def add_quantization_version(self, quantization_version: GGMLQuantizationType) -> None: self.add_uint32( - Keys.GENERAL.QUANTIZATION_VERSION, quantization_version) + Keys.General.QUANTIZATION_VERSION, quantization_version) def add_custom_alignment(self, alignment: int) -> None: self.data_alignment = alignment - self.add_uint32(Keys.GENERAL.ALIGNMENT, alignment) + self.add_uint32(Keys.General.ALIGNMENT, alignment) def add_context_length(self, length: int) -> None: - self.add_uint32( - Keys.LLM.CONTEXT_LENGTH.value.format(arch=self.arch), length) + self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length) def add_embedding_length(self, length: int) -> None: - self.add_uint32( - Keys.LLM.EMBEDDING_LENGTH.value.format(arch=self.arch), length) + self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length) def add_block_count(self, length: int) -> None: - self.add_uint32( - Keys.LLM.BLOCK_COUNT.value.format(arch=self.arch), length) + self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length) def add_feed_forward_length(self, length: int) -> None: - self.add_uint32( - Keys.LLM.FEED_FORWARD_LENGTH.value.format(arch=self.arch), length) + self.add_uint32(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length) def add_parallel_residual(self, use: bool) -> None: - self.add_bool( - Keys.LLM.USE_PARALLEL_RESIDUAL.value.format(arch=self.arch), use) + self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use) def add_head_count(self, count: int) -> None: - self.add_uint32( - Keys.ATTENTION.HEAD_COUNT.value.format(arch=self.arch), count) + self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count) def add_head_count_kv(self, count: int) -> None: - self.add_uint32( - Keys.ATTENTION.HEAD_COUNT_KV.value.format(arch=self.arch), count) + self.add_uint32(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count) def add_max_alibi_bias(self, bias: float) -> None: - self.add_float32( - Keys.ATTENTION.MAX_ALIBI_BIAS.value.format(arch=self.arch), bias) + self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias) def add_clamp_kqv(self, value: float) -> None: - self.add_float32( - Keys.ATTENTION.CLAMP_KQV.value.format(arch=self.arch), value) + self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value) def add_layer_norm_eps(self, value: float) -> None: - self.add_float32( - Keys.ATTENTION.LAYERNORM_EPS.value.format(arch=self.arch), value) + self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value) def add_layer_norm_rms_eps(self, value: float) -> None: - self.add_float32( - Keys.ATTENTION.LAYERNORM_RMS_EPS.value.format(arch=self.arch), value) + self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value) def add_rope_dimension_count(self, count: int) -> None: - self.add_uint32( - Keys.ROPE.DIMENSION_COUNT.value.format(arch=self.arch), count) + self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count) def add_rope_freq_base(self, value: float) -> None: - self.add_float32(Keys.ROPE.FREQ_BASE.value.format(arch=self.arch), value) + self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value) def add_rope_scaling_type(self, value: RopeScalingType) -> None: - self.add_string(Keys.ROPE.SCALING_TYPE.value.format(arch=self.arch), value) + self.add_string(Keys.Rope.SCALING_TYPE.format(arch=self.arch), value.value) def add_rope_scaling_factor(self, value: float) -> None: - self.add_float32(Keys.ROPE.SCALING_FACTOR.value.format(arch=self.arch), value) + self.add_float32(Keys.Rope.SCALING_FACTOR.format(arch=self.arch), value) def add_rope_scaling_orig_ctx_len(self, value: int) -> None: - self.add_uint32(Keys.ROPE.SCALING_ORIG_CTX_LEN.value.format(arch=self.arch), value) + self.add_uint32(Keys.Rope.SCALING_ORIG_CTX_LEN.format(arch=self.arch), value) def add_rope_scaling_finetuned(self, value: bool) -> None: - self.add_bool(Keys.ROPE.SCALING_FINETUNED.value.format(arch=self.arch), value) + self.add_bool(Keys.Rope.SCALING_FINETUNED.format(arch=self.arch), value) def add_tokenizer_model(self, model: str) -> None: - self.add_string(Keys.TOKENIZER.MODEL, model) + self.add_string(Keys.Tokenizer.MODEL, model) def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None: - self.add_array(Keys.TOKENIZER.LIST, tokens) + self.add_array(Keys.Tokenizer.LIST, tokens) def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None: - self.add_array(Keys.TOKENIZER.MERGES, merges) + self.add_array(Keys.Tokenizer.MERGES, merges) def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None: - self.add_array(Keys.TOKENIZER.TOKEN_TYPE, types) + self.add_array(Keys.Tokenizer.TOKEN_TYPE, types) def add_token_scores(self, scores: Sequence[float]) -> None: - self.add_array(Keys.TOKENIZER.SCORES, scores) + self.add_array(Keys.Tokenizer.SCORES, scores) def add_bos_token_id(self, id: int) -> None: - self.add_uint32(Keys.TOKENIZER.BOS_ID, id) + self.add_uint32(Keys.Tokenizer.BOS_ID, id) def add_eos_token_id(self, id: int) -> None: - self.add_uint32(Keys.TOKENIZER.EOS_ID, id) + self.add_uint32(Keys.Tokenizer.EOS_ID, id) def add_unk_token_id(self, id: int) -> None: - self.add_uint32(Keys.TOKENIZER.UNK_ID, id) + self.add_uint32(Keys.Tokenizer.UNK_ID, id) def add_sep_token_id(self, id: int) -> None: - self.add_uint32(Keys.TOKENIZER.SEP_ID, id) + self.add_uint32(Keys.Tokenizer.SEP_ID, id) def add_pad_token_id(self, id: int) -> None: - self.add_uint32(Keys.TOKENIZER.PAD_ID, id) + self.add_uint32(Keys.Tokenizer.PAD_ID, id) def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes: pack_prefix = '' diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf-dump.py index d578f9041..5141873de 100755 --- a/gguf-py/scripts/gguf-dump.py +++ b/gguf-py/scripts/gguf-dump.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +from __future__ import annotations + import argparse import os import sys