From f364636b2e85dedfb680c2d061eab7c6fbe52c71 Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Tue, 7 Nov 2023 21:05:41 -0500
Subject: [PATCH] style cleanup with flake8

---
 gguf-py/gguf/constants.py      | 264 +++++++++++++++++----------------
 gguf-py/gguf/gguf_reader.py    |  92 ++++++------
 gguf-py/gguf/gguf_writer.py    |  47 +++---
 gguf-py/gguf/tensor_mapping.py | 171 ++++++++++-----------
 gguf-py/gguf/vocab.py          |  53 ++++---
 5 files changed, 331 insertions(+), 296 deletions(-)

diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 7a0f5c709..f4bc52674 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -8,7 +8,7 @@ from typing import Any, NamedTuple, Type
 # constants
 #
 
-GGUF_MAGIC             = 0x46554747 # "GGUF"
+GGUF_MAGIC             = 0x46554747  # "GGUF"
 GGUF_VERSION           = 3
 GGUF_DEFAULT_ALIGNMENT = 32
 
@@ -16,64 +16,71 @@ GGUF_DEFAULT_ALIGNMENT = 32
 # metadata keys
 #
 
+
 class GeneralKeys(StrEnum):
-    ARCHITECTURE        : str = "general.architecture"
+    ARCHITECTURE:         str = "general.architecture"
     QUANTIZATION_VERSION: str = "general.quantization_version"
-    ALIGNMENT           : str = "general.alignment"
-    NAME                : str = "general.name"
-    AUTHOR              : str = "general.author"
-    URL                 : str = "general.url"
-    DESCRIPTION         : str = "general.description"
-    LICENSE             : str = "general.license"
-    SOURCE_URL          : str = "general.source.url"
-    SOURCE_HF_REPO      : str = "general.source.huggingface.repository"
-    FILE_TYPE           : str = "general.file_type"
+    ALIGNMENT:            str = "general.alignment"
+    NAME:                 str = "general.name"
+    AUTHOR:               str = "general.author"
+    URL:                  str = "general.url"
+    DESCRIPTION:          str = "general.description"
+    LICENSE:              str = "general.license"
+    SOURCE_URL:           str = "general.source.url"
+    SOURCE_HF_REPO:       str = "general.source.huggingface.repository"
+    FILE_TYPE:            str = "general.file_type"
+
 
 class AttentionKeys(StrEnum):
-    HEAD_COUNT       : str = "{arch}.attention.head_count"
-    HEAD_COUNT_KV    : str = "{arch}.attention.head_count_kv"
-    MAX_ALIBI_BIAS   : str = "{arch}.attention.max_alibi_bias"
-    CLAMP_KQV        : str = "{arch}.attention.clamp_kqv"
-    LAYERNORM_EPS    : str = "{arch}.attention.layer_norm_epsilon"
+    HEAD_COUNT:        str = "{arch}.attention.head_count"
+    HEAD_COUNT_KV:     str = "{arch}.attention.head_count_kv"
+    MAX_ALIBI_BIAS:    str = "{arch}.attention.max_alibi_bias"
+    CLAMP_KQV:         str = "{arch}.attention.clamp_kqv"
+    LAYERNORM_EPS:     str = "{arch}.attention.layer_norm_epsilon"
     LAYERNORM_RMS_EPS: str = "{arch}.attention.layer_norm_rms_epsilon"
 
+
 class RopeKeys(StrEnum):
-    DIMENSION_COUNT     : str = "{arch}.rope.dimension_count"
-    FREQ_BASE           : str = "{arch}.rope.freq_base"
-    SCALING_TYPE        : str = "{arch}.rope.scaling.type"
-    SCALING_FACTOR      : str = "{arch}.rope.scaling.factor"
+    DIMENSION_COUNT:      str = "{arch}.rope.dimension_count"
+    FREQ_BASE:            str = "{arch}.rope.freq_base"
+    SCALING_TYPE:         str = "{arch}.rope.scaling.type"
+    SCALING_FACTOR:       str = "{arch}.rope.scaling.factor"
     SCALING_ORIG_CTX_LEN: str = "{arch}.rope.scaling.original_context_length"
-    SCALING_FINETUNED   : str = "{arch}.rope.scaling.finetuned"
+    SCALING_FINETUNED:    str = "{arch}.rope.scaling.finetuned"
+
 
 class TokenizerKeys(StrEnum):
-    MODEL     : str = "tokenizer.ggml.model"
-    LIST      : str = "tokenizer.ggml.tokens"
+    MODEL:      str = "tokenizer.ggml.model"
+    LIST:       str = "tokenizer.ggml.tokens"
     TOKEN_TYPE: str = "tokenizer.ggml.token_type"
-    SCORES    : str = "tokenizer.ggml.scores"
-    MERGES    : str = "tokenizer.ggml.merges"
-    BOS_ID    : str = "tokenizer.ggml.bos_token_id"
-    EOS_ID    : str = "tokenizer.ggml.eos_token_id"
-    UNK_ID    : str = "tokenizer.ggml.unknown_token_id"
-    SEP_ID    : str = "tokenizer.ggml.seperator_token_id"
-    PAD_ID    : str = "tokenizer.ggml.padding_token_id"
-    HF_JSON   : str = "tokenizer.huggingface.json"
-    RWKV      : str = "tokenizer.rwkv.world"
+    SCORES:     str = "tokenizer.ggml.scores"
+    MERGES:     str = "tokenizer.ggml.merges"
+    BOS_ID:     str = "tokenizer.ggml.bos_token_id"
+    EOS_ID:     str = "tokenizer.ggml.eos_token_id"
+    UNK_ID:     str = "tokenizer.ggml.unknown_token_id"
+    SEP_ID:     str = "tokenizer.ggml.seperator_token_id"
+    PAD_ID:     str = "tokenizer.ggml.padding_token_id"
+    HF_JSON:    str = "tokenizer.huggingface.json"
+    RWKV:       str = "tokenizer.rwkv.world"
+
 
 class LLMKeys(StrEnum):
-    CONTEXT_LENGTH       : str = "{arch}.context_length"
-    EMBEDDING_LENGTH     : str = "{arch}.embedding_length"
-    BLOCK_COUNT          : str = "{arch}.block_count"
-    FEED_FORWARD_LENGTH  : str = "{arch}.feed_forward_length"
+    CONTEXT_LENGTH:        str = "{arch}.context_length"
+    EMBEDDING_LENGTH:      str = "{arch}.embedding_length"
+    BLOCK_COUNT:           str = "{arch}.block_count"
+    FEED_FORWARD_LENGTH:   str = "{arch}.feed_forward_length"
     USE_PARALLEL_RESIDUAL: str = "{arch}.use_parallel_residual"
-    TENSOR_DATA_LAYOUT   : str = "{arch}.tensor_data_layout"
+    TENSOR_DATA_LAYOUT:    str = "{arch}.tensor_data_layout"
+
 
 class Keys(NamedTuple):
-    GENERAL  : Type[GeneralKeys  ] = GeneralKeys
-    LLM      : Type[LLMKeys      ] = LLMKeys
+    GENERAL:   Type[GeneralKeys]   = GeneralKeys
+    LLM:       Type[LLMKeys]       = LLMKeys
     ATTENTION: Type[AttentionKeys] = AttentionKeys
-    ROPE     : Type[RopeKeys     ] = RopeKeys
+    ROPE:      Type[RopeKeys]      = RopeKeys
     TOKENIZER: Type[TokenizerKeys] = TokenizerKeys
 
+
 KEY = Keys()
 
 #
@@ -82,42 +89,42 @@ KEY = Keys()
 
 
 class MODEL_ARCH(IntEnum):
-    LLAMA         : int = auto()
-    FALCON        : int = auto()
-    BAICHUAN      : int = auto()
-    GPT2          : int = auto()
-    GPTJ          : int = auto()
-    GPTNEOX       : int = auto()
-    MPT           : int = auto()
-    STARCODER     : int = auto()
-    PERSIMMON     : int = auto()
-    REFACT        : int = auto()
-    BERT          : int = auto()
-    BLOOM         : int = auto()
+    LLAMA:     int = auto()
+    FALCON:    int = auto()
+    BAICHUAN:  int = auto()
+    GPT2:      int = auto()
+    GPTJ:      int = auto()
+    GPTNEOX:   int = auto()
+    MPT:       int = auto()
+    STARCODER: int = auto()
+    PERSIMMON: int = auto()
+    REFACT:    int = auto()
+    BERT:      int = auto()
+    BLOOM:     int = auto()
 
 
 class MODEL_TENSOR(IntEnum):
-    TOKEN_EMBD      : int = auto()
-    TOKEN_EMBD_NORM : int = auto()
-    TOKEN_TYPES     : int = auto()
-    POS_EMBD        : int = auto()
-    OUTPUT          : int = auto()
-    OUTPUT_NORM     : int = auto()
-    ROPE_FREQS      : int = auto()
-    ATTN_Q          : int = auto()
-    ATTN_K          : int = auto()
-    ATTN_V          : int = auto()
-    ATTN_QKV        : int = auto()
-    ATTN_OUT        : int = auto()
-    ATTN_NORM       : int = auto()
-    ATTN_NORM_2     : int = auto()
-    ATTN_ROT_EMBD   : int = auto()
-    FFN_GATE        : int = auto()
-    FFN_DOWN        : int = auto()
-    FFN_UP          : int = auto()
-    FFN_NORM        : int = auto()
-    ATTN_Q_NORM     : int = auto()
-    ATTN_K_NORM     : int = auto()
+    TOKEN_EMBD:      int = auto()
+    TOKEN_EMBD_NORM: int = auto()
+    TOKEN_TYPES:     int = auto()
+    POS_EMBD:        int = auto()
+    OUTPUT:          int = auto()
+    OUTPUT_NORM:     int = auto()
+    ROPE_FREQS:      int = auto()
+    ATTN_Q:          int = auto()
+    ATTN_K:          int = auto()
+    ATTN_V:          int = auto()
+    ATTN_QKV:        int = auto()
+    ATTN_OUT:        int = auto()
+    ATTN_NORM:       int = auto()
+    ATTN_NORM_2:     int = auto()
+    ATTN_ROT_EMBD:   int = auto()
+    FFN_GATE:        int = auto()
+    FFN_DOWN:        int = auto()
+    FFN_UP:          int = auto()
+    FFN_NORM:        int = auto()
+    ATTN_Q_NORM:     int = auto()
+    ATTN_K_NORM:     int = auto()
 
 
 MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
@@ -321,13 +328,14 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
     ],
     MODEL_ARCH.PERSIMMON: [
         MODEL_TENSOR.ROPE_FREQS,
-    ]
+    ],
 }
 
 #
 # types
 #
 
+
 class TokenType(IntEnum):
     NORMAL       = 1
     UNKNOWN      = 2
@@ -336,11 +344,13 @@ class TokenType(IntEnum):
     UNUSED       = 5
     BYTE         = 6
 
+
 class RopeScalingType(Enum):
     NONE   = 'none'
     LINEAR = 'linear'
     YARN   = 'yarn'
 
+
 class GGMLQuantizationType(IntEnum):
     F32  = 0
     F16  = 1
@@ -357,6 +367,7 @@ class GGMLQuantizationType(IntEnum):
     Q6_K = 14
     Q8_K = 15
 
+
 class GGUFEndian(IntEnum):
     LITTLE = 0
     BIG = 1
@@ -379,7 +390,7 @@ class GGUFValueType(IntEnum):
 
     @staticmethod
     def get_type(val: Any) -> GGUFValueType:
-        if isinstance(val, str) or isinstance(val, bytes) or isinstance(val, bytearray):
+        if isinstance(val, (str, bytes, bytearray)):
             return GGUFValueType.STRING
         elif isinstance(val, list):
             return GGUFValueType.ARRAY
@@ -391,79 +402,80 @@ class GGUFValueType(IntEnum):
             return GGUFValueType.INT32
         # TODO: need help with 64-bit types in Python
         else:
-            print("Unknown type: "+str(type(val)))
+            print("Unknown type:", type(val))
             sys.exit()
 
+
 # Note: Does not support GGML_QKK_64
 QK_K = 256
 # Items here are (block size, type size)
 GGML_QUANT_SIZES = {
-    GGMLQuantizationType.F32  : (1, 4),
-    GGMLQuantizationType.F16  : (1, 2),
-    GGMLQuantizationType.Q4_0 : (32, 2 + 16),
-    GGMLQuantizationType.Q4_1 : (32, 2 + 2 + 16),
-    GGMLQuantizationType.Q5_0 : (32, 2 + 4 + 16),
-    GGMLQuantizationType.Q5_1 : (32, 2 + 2 + 4 + 16),
-    GGMLQuantizationType.Q8_0 : (32, 2 + 32),
-    GGMLQuantizationType.Q8_1 : (32, 4 + 4 + 32),
-    GGMLQuantizationType.Q2_K : (256, 2 + 2 + QK_K // 16 + QK_K // 4),
-    GGMLQuantizationType.Q3_K : (256, 2 + QK_K // 4 + QK_K // 8 + 12),
-    GGMLQuantizationType.Q4_K : (256, 2 + 2 + QK_K // 2 + 12),
-    GGMLQuantizationType.Q5_K : (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
-    GGMLQuantizationType.Q6_K : (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
-    GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8),
+    GGMLQuantizationType.F32:  (1, 4),
+    GGMLQuantizationType.F16:  (1, 2),
+    GGMLQuantizationType.Q4_0: (32, 2 + 16),
+    GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
+    GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
+    GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
+    GGMLQuantizationType.Q8_0: (32, 2 + 32),
+    GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
+    GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
+    GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
+    GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
+    GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
+    GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
+    GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
 }
 
 
 # Aliases for backward compatibility.
 
 # general
-KEY_GENERAL_ARCHITECTURE        : str = KEY.GENERAL.ARCHITECTURE
+KEY_GENERAL_ARCHITECTURE:         str = KEY.GENERAL.ARCHITECTURE
 KEY_GENERAL_QUANTIZATION_VERSION: str = KEY.GENERAL.QUANTIZATION_VERSION
-KEY_GENERAL_ALIGNMENT           : str = KEY.GENERAL.ALIGNMENT
-KEY_GENERAL_NAME                : str = KEY.GENERAL.NAME
-KEY_GENERAL_AUTHOR              : str = KEY.GENERAL.AUTHOR
-KEY_GENERAL_URL                 : str = KEY.GENERAL.URL
-KEY_GENERAL_DESCRIPTION         : str = KEY.GENERAL.DESCRIPTION
-KEY_GENERAL_LICENSE             : str = KEY.GENERAL.LICENSE
-KEY_GENERAL_SOURCE_URL          : str = KEY.GENERAL.SOURCE_URL
-KEY_GENERAL_SOURCE_HF_REPO      : str = KEY.GENERAL.SOURCE_HF_REPO
-KEY_GENERAL_FILE_TYPE           : str = KEY.GENERAL.FILE_TYPE
+KEY_GENERAL_ALIGNMENT:            str = KEY.GENERAL.ALIGNMENT
+KEY_GENERAL_NAME:                 str = KEY.GENERAL.NAME
+KEY_GENERAL_AUTHOR:               str = KEY.GENERAL.AUTHOR
+KEY_GENERAL_URL:                  str = KEY.GENERAL.URL
+KEY_GENERAL_DESCRIPTION:          str = KEY.GENERAL.DESCRIPTION
+KEY_GENERAL_LICENSE:              str = KEY.GENERAL.LICENSE
+KEY_GENERAL_SOURCE_URL:           str = KEY.GENERAL.SOURCE_URL
+KEY_GENERAL_SOURCE_HF_REPO:       str = KEY.GENERAL.SOURCE_HF_REPO
+KEY_GENERAL_FILE_TYPE:            str = KEY.GENERAL.FILE_TYPE
 
 # LLM
-KEY_CONTEXT_LENGTH       : str = KEY.LLM.CONTEXT_LENGTH
-KEY_EMBEDDING_LENGTH     : str = KEY.LLM.EMBEDDING_LENGTH
-KEY_BLOCK_COUNT          : str = KEY.LLM.BLOCK_COUNT
-KEY_FEED_FORWARD_LENGTH  : str = KEY.LLM.FEED_FORWARD_LENGTH
+KEY_CONTEXT_LENGTH:        str = KEY.LLM.CONTEXT_LENGTH
+KEY_EMBEDDING_LENGTH:      str = KEY.LLM.EMBEDDING_LENGTH
+KEY_BLOCK_COUNT:           str = KEY.LLM.BLOCK_COUNT
+KEY_FEED_FORWARD_LENGTH:   str = KEY.LLM.FEED_FORWARD_LENGTH
 KEY_USE_PARALLEL_RESIDUAL: str = KEY.LLM.USE_PARALLEL_RESIDUAL
-KEY_TENSOR_DATA_LAYOUT   : str = KEY.LLM.TENSOR_DATA_LAYOUT
+KEY_TENSOR_DATA_LAYOUT:    str = KEY.LLM.TENSOR_DATA_LAYOUT
 
 # attention
-KEY_ATTENTION_HEAD_COUNT       : str = KEY.ATTENTION.HEAD_COUNT
-KEY_ATTENTION_HEAD_COUNT_KV    : str = KEY.ATTENTION.HEAD_COUNT_KV
-KEY_ATTENTION_MAX_ALIBI_BIAS   : str = KEY.ATTENTION.MAX_ALIBI_BIAS
-KEY_ATTENTION_CLAMP_KQV        : str = KEY.ATTENTION.CLAMP_KQV
-KEY_ATTENTION_LAYERNORM_EPS    : str = KEY.ATTENTION.LAYERNORM_EPS
+KEY_ATTENTION_HEAD_COUNT:        str = KEY.ATTENTION.HEAD_COUNT
+KEY_ATTENTION_HEAD_COUNT_KV:     str = KEY.ATTENTION.HEAD_COUNT_KV
+KEY_ATTENTION_MAX_ALIBI_BIAS:    str = KEY.ATTENTION.MAX_ALIBI_BIAS
+KEY_ATTENTION_CLAMP_KQV:         str = KEY.ATTENTION.CLAMP_KQV
+KEY_ATTENTION_LAYERNORM_EPS:     str = KEY.ATTENTION.LAYERNORM_EPS
 KEY_ATTENTION_LAYERNORM_RMS_EPS: str = KEY.ATTENTION.LAYERNORM_RMS_EPS
 
 # RoPE
-KEY_ROPE_DIMENSION_COUNT     : str = KEY.ROPE.DIMENSION_COUNT
-KEY_ROPE_FREQ_BASE           : str = KEY.ROPE.FREQ_BASE
-KEY_ROPE_SCALING_TYPE        : str = KEY.ROPE.SCALING_TYPE
-KEY_ROPE_SCALING_FACTOR      : str = KEY.ROPE.SCALING_FACTOR
+KEY_ROPE_DIMENSION_COUNT:      str = KEY.ROPE.DIMENSION_COUNT
+KEY_ROPE_FREQ_BASE:            str = KEY.ROPE.FREQ_BASE
+KEY_ROPE_SCALING_TYPE:         str = KEY.ROPE.SCALING_TYPE
+KEY_ROPE_SCALING_FACTOR:       str = KEY.ROPE.SCALING_FACTOR
 KEY_ROPE_SCALING_ORIG_CTX_LEN: str = KEY.ROPE.SCALING_ORIG_CTX_LEN
-KEY_ROPE_SCALING_FINETUNED   : str = KEY.ROPE.SCALING_FINETUNED
+KEY_ROPE_SCALING_FINETUNED:    str = KEY.ROPE.SCALING_FINETUNED
 
 # tokenization
-KEY_TOKENIZER_MODEL     : str = KEY.TOKENIZER.MODEL
-KEY_TOKENIZER_LIST      : str = KEY.TOKENIZER.LIST
+KEY_TOKENIZER_MODEL:      str = KEY.TOKENIZER.MODEL
+KEY_TOKENIZER_LIST:       str = KEY.TOKENIZER.LIST
 KEY_TOKENIZER_TOKEN_TYPE: str = KEY.TOKENIZER.TOKEN_TYPE
-KEY_TOKENIZER_SCORES    : str = KEY.TOKENIZER.SCORES
-KEY_TOKENIZER_MERGES    : str = KEY.TOKENIZER.MERGES
-KEY_TOKENIZER_BOS_ID    : str = KEY.TOKENIZER.BOS_ID
-KEY_TOKENIZER_EOS_ID    : str = KEY.TOKENIZER.EOS_ID
-KEY_TOKENIZER_UNK_ID    : str = KEY.TOKENIZER.UNK_ID
-KEY_TOKENIZER_SEP_ID    : str = KEY.TOKENIZER.SEP_ID
-KEY_TOKENIZER_PAD_ID    : str = KEY.TOKENIZER.PAD_ID
-KEY_TOKENIZER_HF_JSON   : str = KEY.TOKENIZER.HF_JSON
-KEY_TOKENIZER_RWKV      : str = KEY.TOKENIZER.RWKV
+KEY_TOKENIZER_SCORES:     str = KEY.TOKENIZER.SCORES
+KEY_TOKENIZER_MERGES:     str = KEY.TOKENIZER.MERGES
+KEY_TOKENIZER_BOS_ID:     str = KEY.TOKENIZER.BOS_ID
+KEY_TOKENIZER_EOS_ID:     str = KEY.TOKENIZER.EOS_ID
+KEY_TOKENIZER_UNK_ID:     str = KEY.TOKENIZER.UNK_ID
+KEY_TOKENIZER_SEP_ID:     str = KEY.TOKENIZER.SEP_ID
+KEY_TOKENIZER_PAD_ID:     str = KEY.TOKENIZER.PAD_ID
+KEY_TOKENIZER_HF_JSON:    str = KEY.TOKENIZER.HF_JSON
+KEY_TOKENIZER_RWKV:       str = KEY.TOKENIZER.RWKV
diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
index 479fae2a3..3326e9517 100644
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -20,7 +20,7 @@ from gguf.constants import (
     GGUF_MAGIC,
     GGUF_VERSION,
     GGMLQuantizationType,
-    GGUFValueType
+    GGUFValueType,
 )
 
 READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
@@ -76,14 +76,49 @@ class GGUFReader:
         GGUFValueType.BOOL:    np.bool_,
     }
 
+    def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r'):
+        self.data = np.memmap(path, mode = mode)
+        offs = 0
+        if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
+            raise ValueError('GGUF magic invalid')
+        offs += 4
+        temp_version = self._get(offs, np.uint32)
+        if temp_version[0] > 2000:
+            self.byte_order = 'S'
+            temp_version = temp_version.newbyteorder(self.byte_order)
+        version = temp_version[0]
+        if version not in READER_SUPPORTED_VERSIONS:
+            raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
+        offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
+        temp_counts = self._get(offs, np.uint64, 2)
+        offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
+        offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
+        tensor_count, kv_count = temp_counts
+        offs = self._build_fields(offs, kv_count)
+        offs, tensors_fields = self._build_tensors_fields(offs, tensor_count)
+        new_align = self.fields.get('general.alignment')
+        if new_align is not None:
+            if new_align.types != [GGUFValueType.UINT64]:
+                raise ValueError('Bad type for general.alignment field')
+            self.alignment = new_align.parts[-1][0]
+        padding = offs % self.alignment
+        if padding != 0:
+            offs += self.alignment - padding
+        self._build_tensors(offs, tensors_fields)
+
     _DT = TypeVar('_DT', bound = npt.DTypeLike)
-    def _get(self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None) -> npt.NDArray[Any]:
+
+    def _get(
+        self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None,
+    ) -> npt.NDArray[Any]:
         count = int(count)
         itemsize = int(np.empty([], dtype = dtype).itemsize)
         end_offs = offset + itemsize * count
-        return (self.data[offset:end_offs]
+        return (
+            self.data[offset:end_offs]
             .view(dtype = dtype)[:count]
-            .newbyteorder(override_order or self.byte_order))
+            .newbyteorder(override_order or self.byte_order)
+        )
 
     def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
         if field.name in self.fields:
@@ -93,9 +128,11 @@ class GGUFReader:
 
     def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
         slen = self._get(offset, np.uint64)
-        return (slen, self._get(offset + 8, np.uint8, slen[0]))
+        return slen, self._get(offset + 8, np.uint8, slen[0])
 
-    def _get_field_parts(self, orig_offs: int, raw_type: int) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
+    def _get_field_parts(
+        self, orig_offs: int, raw_type: int,
+    ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
         offs = orig_offs
         types: list[GGUFValueType] = []
         gtype = GGUFValueType(raw_type)
@@ -104,12 +141,12 @@ class GGUFReader:
         if gtype == GGUFValueType.STRING:
             sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
             size = sum(int(part.nbytes) for part in sparts)
-            return (size, sparts, [1], types)
+            return size, sparts, [1], types
         # Check if it's a simple scalar type.
         nptype = self._simple_value_map.get(gtype)
         if nptype is not None:
             val = self._get(offs, nptype)
-            return (int(val.nbytes), [val], [0], types)
+            return int(val.nbytes), [val], [0], types
         # Handle arrays.
         if gtype == GGUFValueType.ARRAY:
             raw_itype = self._get(offs, np.uint32)
@@ -126,7 +163,7 @@ class GGUFReader:
                 aparts += curr_parts
                 data_idxs += (idx + idxs_offs for idx in curr_idxs)
                 offs += curr_size
-            return (offs - orig_offs, aparts, data_idxs, types)
+            return offs - orig_offs, aparts, data_idxs, types
         # We can't deal with this one.
         raise ValueError('Unknown/unhandled field type {gtype}')
 
@@ -164,7 +201,7 @@ class GGUFReader:
                 orig_offs,
                 str(bytes(kv_kdata), encoding = 'utf-8'),
                 parts,
-                list(idx + idxs_offs for idx in field_idxs),
+                [idx + idxs_offs for idx in field_idxs],
                 field_types,
             ), skip_sum = True)
             offs += field_size
@@ -176,7 +213,7 @@ class GGUFReader:
             field = self._get_tensor(offs)
             offs += sum(int(part.nbytes) for part in field.parts)
             tensor_fields.append(field)
-        return (offs, tensor_fields)
+        return offs, tensor_fields
 
     def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
         tensors = []
@@ -210,37 +247,6 @@ class GGUFReader:
         self.tensors = tensors
 
 
-    def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r') -> None:
-        self.data = np.memmap(path, mode = mode)
-        offs = 0
-        if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
-            raise ValueError('GGUF magic invalid')
-        offs += 4
-        temp_version = self._get(offs, np.uint32)
-        if temp_version[0] > 2000:
-            self.byte_order = 'S'
-            temp_version = temp_version.newbyteorder(self.byte_order)
-        version = temp_version[0]
-        if version not in READER_SUPPORTED_VERSIONS:
-            raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
-        offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
-        temp_counts = self._get(offs, np.uint64, 2)
-        offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
-        offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
-        tensor_count, kv_count = temp_counts
-        offs = self._build_fields(offs, kv_count)
-        offs, tensors_fields = self._build_tensors_fields(offs, tensor_count)
-        new_align = self.fields.get('general.alignment')
-        if new_align is not None:
-            if new_align.types != [GGUFValueType.UINT64]:
-                raise ValueError('Bad type for general.alignment field')
-            self.alignment = new_align.parts[-1][0]
-        padding = offs % self.alignment
-        if padding != 0:
-            offs += self.alignment - padding
-        self._build_tensors(offs, tensors_fields)
-
-
 # Example usage:
 if __name__ == "__main__":
     if len(sys.argv) < 2:
@@ -250,7 +256,7 @@ if __name__ == "__main__":
     reader = GGUFReader(sys.argv[1], 'r')
     print(f'\n* Dumping {len(reader.fields)} key/value pair(s)')
     for n, field in enumerate(reader.fields.values(), 1):
-        if len(field.types) == 0:
+        if not field.types:
             pretty_type = 'N/A'
         elif field.types[0] == GGUFValueType.ARRAY:
             nest_count = len(field.types) - 1
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 99d4d70a8..2c74cf025 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -19,7 +19,7 @@ from .constants import (
     GGUFEndian,
     GGUFValueType,
     RopeScalingType,
-    TokenType
+    TokenType,
 )
 
 
@@ -29,6 +29,7 @@ class WriterState(Enum):
     KV_DATA = auto()
     TI_DATA = auto()
 
+
 class GGUFWriter:
     fout: BufferedWriter
     temp_file: tempfile.SpooledTemporaryFile[bytes] | None
@@ -47,16 +48,10 @@ class GGUFWriter:
         GGUFValueType.BOOL:    "?",
     }
 
-    def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
-        pack_prefix = ''
-        if not skip_pack_prefix:
-            pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
-        return struct.pack(f'{pack_prefix}{fmt}', value)
-
-    def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
-        self.fout.write(self._pack(fmt, value, skip_pack_prefix))
-
-    def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True, endianess: GGUFEndian = GGUFEndian.LITTLE) -> None:
+    def __init__(
+        self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True,
+        endianess: GGUFEndian = GGUFEndian.LITTLE,
+    ):
         self.fout = open(path, "wb")
         self.arch = arch
         self.endianess = endianess
@@ -69,8 +64,9 @@ class GGUFWriter:
         self.use_temp_file = use_temp_file
         self.temp_file = None
         self.tensors = []
-        print("gguf: This GGUF file is for {0} Endian only"
-            .format("Big" if self.endianess == GGUFEndian.BIG else "Little"))
+        print("gguf: This GGUF file is for {0} Endian only".format(
+            "Big" if self.endianess == GGUFEndian.BIG else "Little",
+        ))
         self.state = WriterState.EMPTY
 
         self.add_architecture()
@@ -150,7 +146,7 @@ class GGUFWriter:
         self.add_val(val, GGUFValueType.BOOL)
 
     def add_string(self, key: str, val: str) -> None:
-        if len(val) == 0:
+        if not val:
             return
         self.add_key(key)
         self.add_val(val, GGUFValueType.STRING)
@@ -177,7 +173,7 @@ class GGUFWriter:
             encoded_val = val.encode("utf8") if isinstance(val, str) else val
             self.kv_data += self._pack("Q", len(encoded_val))
             self.kv_data += encoded_val
-        elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0:
+        elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and val:
             ltype = GGUFValueType.get_type(val[0])
             if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
                 raise ValueError("All items in a GGUF array should be of the same type")
@@ -192,7 +188,10 @@ class GGUFWriter:
     def ggml_pad(x: int, n: int) -> int:
         return ((x + n - 1) // n) * n
 
-    def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None) -> None:
+    def add_tensor_info(
+        self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32],
+        tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None,
+    ) -> None:
         if self.state is not WriterState.EMPTY:
             raise ValueError(f'Expected output file to be empty, got {self.state}')
 
@@ -215,7 +214,10 @@ class GGUFWriter:
         self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
         self.ti_data_count += 1
 
-    def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None) -> None:
+    def add_tensor(
+        self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
+        raw_dtype: GGMLQuantizationType | None = None,
+    ) -> None:
         if self.endianess == GGUFEndian.BIG:
             tensor.byteswap(inplace=True)
         if self.use_temp_file and self.temp_file is None:
@@ -242,7 +244,7 @@ class GGUFWriter:
         if self.state is not WriterState.TI_DATA:
             raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
 
-        if self.endianess==GGUFEndian.BIG:
+        if self.endianess == GGUFEndian.BIG:
             tensor.byteswap(inplace=True)
         self.write_padding(self.fout, self.fout.tell())
         tensor.tofile(self.fout)
@@ -402,3 +404,12 @@ class GGUFWriter:
 
     def add_pad_token_id(self, id: int) -> None:
         self.add_uint32(KEY.TOKENIZER.PAD_ID, id)
+
+    def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
+        pack_prefix = ''
+        if not skip_pack_prefix:
+            pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
+        return struct.pack(f'{pack_prefix}{fmt}', value)
+
+    def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
+        self.fout.write(self._pack(fmt, value, skip_pack_prefix))
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 55df1ba0f..22ad8b8fc 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -9,14 +9,14 @@ class TensorNameMap:
     mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
         # Token embeddings
         MODEL_TENSOR.TOKEN_EMBD: (
-            "gpt_neox.embed_in",                        # gptneox
-            "transformer.wte",                          # gpt2 gpt-j mpt refact
-            "transformer.word_embeddings",              # falcon
-            "word_embeddings",                          # bloom
-            "model.embed_tokens",                       # llama-hf
-            "tok_embeddings",                           # llama-pth
-            "embeddings.word_embeddings",               # bert
-            "language_model.embedding.word_embeddings", # persimmon
+            "gpt_neox.embed_in",                         # gptneox
+            "transformer.wte",                           # gpt2 gpt-j mpt refact
+            "transformer.word_embeddings",               # falcon
+            "word_embeddings",                           # bloom
+            "model.embed_tokens",                        # llama-hf
+            "tok_embeddings",                            # llama-pth
+            "embeddings.word_embeddings",                # bert
+            "language_model.embedding.word_embeddings",  # persimmon
         ),
 
         # Token type embeddings
@@ -37,59 +37,59 @@ class TensorNameMap:
 
         # Output
         MODEL_TENSOR.OUTPUT: (
-            "embed_out",                # gptneox
-            "lm_head",                  # gpt2 mpt falcon llama-hf baichuan
-            "output",                   # llama-pth bloom
-            "word_embeddings_for_head", # persimmon
+            "embed_out",                 # gptneox
+            "lm_head",                   # gpt2 mpt falcon llama-hf baichuan
+            "output",                    # llama-pth bloom
+            "word_embeddings_for_head",  # persimmon
         ),
 
         # Output norm
         MODEL_TENSOR.OUTPUT_NORM: (
-            "gpt_neox.final_layer_norm",              # gptneox
-            "transformer.ln_f",                       # gpt2 gpt-j falcon
-            "model.norm",                             # llama-hf baichuan
-            "norm",                                   # llama-pth
-            "embeddings.LayerNorm",                   # bert
-            "transformer.norm_f",                     # mpt
-            "ln_f",                                   # refact bloom
-            "language_model.encoder.final_layernorm", # persimmon
+            "gpt_neox.final_layer_norm",               # gptneox
+            "transformer.ln_f",                        # gpt2 gpt-j falcon
+            "model.norm",                              # llama-hf baichuan
+            "norm",                                    # llama-pth
+            "embeddings.LayerNorm",                    # bert
+            "transformer.norm_f",                      # mpt
+            "ln_f",                                    # refact bloom
+            "language_model.encoder.final_layernorm",  # persimmon
         ),
 
         # Rope frequencies
         MODEL_TENSOR.ROPE_FREQS: (
-            "rope.freqs", # llama-pth
+            "rope.freqs",  # llama-pth
         ),
     }
 
     block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
         # Attention norm
         MODEL_TENSOR.ATTN_NORM: (
-            "gpt_neox.layers.{bid}.input_layernorm",               # gptneox
-            "transformer.h.{bid}.ln_1",                            # gpt2 gpt-j refact
-            "transformer.blocks.{bid}.norm_1",                     # mpt
-            "transformer.h.{bid}.input_layernorm",                 # falcon7b
-            "h.{bid}.input_layernorm",                             # bloom
-            "transformer.h.{bid}.ln_mlp",                          # falcon40b
-            "model.layers.{bid}.input_layernorm",                  # llama-hf
-            "layers.{bid}.attention_norm",                         # llama-pth
-            "encoder.layer.{bid}.attention.output.LayerNorm",      # bert
-            "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
-            "model.layers.{bid}.ln1",                              # yi
+            "gpt_neox.layers.{bid}.input_layernorm",                # gptneox
+            "transformer.h.{bid}.ln_1",                             # gpt2 gpt-j refact
+            "transformer.blocks.{bid}.norm_1",                      # mpt
+            "transformer.h.{bid}.input_layernorm",                  # falcon7b
+            "h.{bid}.input_layernorm",                              # bloom
+            "transformer.h.{bid}.ln_mlp",                           # falcon40b
+            "model.layers.{bid}.input_layernorm",                   # llama-hf
+            "layers.{bid}.attention_norm",                          # llama-pth
+            "encoder.layer.{bid}.attention.output.LayerNorm",       # bert
+            "language_model.encoder.layers.{bid}.input_layernorm",  # persimmon
+            "model.layers.{bid}.ln1",                               # yi
         ),
 
         # Attention norm 2
         MODEL_TENSOR.ATTN_NORM_2: (
-            "transformer.h.{bid}.ln_attn", # falcon40b
+            "transformer.h.{bid}.ln_attn",  # falcon40b
         ),
 
         # Attention query-key-value
         MODEL_TENSOR.ATTN_QKV: (
-            "gpt_neox.layers.{bid}.attention.query_key_value",                    # gptneox
-            "transformer.h.{bid}.attn.c_attn",                                    # gpt2
-            "transformer.blocks.{bid}.attn.Wqkv",                                 # mpt
-            "transformer.h.{bid}.self_attention.query_key_value",                 # falcon
-            "h.{bid}.self_attention.query_key_value",                             # bloom
-            "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
+            "gpt_neox.layers.{bid}.attention.query_key_value",                     # gptneox
+            "transformer.h.{bid}.attn.c_attn",                                     # gpt2
+            "transformer.blocks.{bid}.attn.Wqkv",                                  # mpt
+            "transformer.h.{bid}.self_attention.query_key_value",                  # falcon
+            "h.{bid}.self_attention.query_key_value",                              # bloom
+            "language_model.encoder.layers.{bid}.self_attention.query_key_value",  # persimmon
         ),
 
         # Attention query
@@ -118,69 +118,69 @@ class TensorNameMap:
 
         # Attention output
         MODEL_TENSOR.ATTN_OUT: (
-            "gpt_neox.layers.{bid}.attention.dense",                   # gptneox
-            "transformer.h.{bid}.attn.c_proj",                         # gpt2 refact
-            "transformer.blocks.{bid}.attn.out_proj",                  # mpt
-            "transformer.h.{bid}.self_attention.dense",                # falcon
-            "h.{bid}.self_attention.dense",                            # bloom
-            "model.layers.{bid}.self_attn.o_proj",                     # llama-hf
-            "layers.{bid}.attention.wo",                               # llama-pth
-            "encoder.layer.{bid}.attention.output.dense",              # bert
-            "transformer.h.{bid}.attn.out_proj",                       # gpt-j
-            "language_model.encoder.layers.{bid}.self_attention.dense" # persimmon
+            "gpt_neox.layers.{bid}.attention.dense",                     # gptneox
+            "transformer.h.{bid}.attn.c_proj",                           # gpt2 refact
+            "transformer.blocks.{bid}.attn.out_proj",                    # mpt
+            "transformer.h.{bid}.self_attention.dense",                  # falcon
+            "h.{bid}.self_attention.dense",                              # bloom
+            "model.layers.{bid}.self_attn.o_proj",                       # llama-hf
+            "layers.{bid}.attention.wo",                                 # llama-pth
+            "encoder.layer.{bid}.attention.output.dense",                # bert
+            "transformer.h.{bid}.attn.out_proj",                         # gpt-j
+            "language_model.encoder.layers.{bid}.self_attention.dense",  # persimmon
         ),
 
         # Rotary embeddings
         MODEL_TENSOR.ATTN_ROT_EMBD: (
-            "model.layers.{bid}.self_attn.rotary_emb.inv_freq",  # llama-hf
-            "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
+            "model.layers.{bid}.self_attn.rotary_emb.inv_freq",   # llama-hf
+            "layers.{bid}.attention.inner_attention.rope.freqs",  # llama-pth
         ),
 
         # Feed-forward norm
         MODEL_TENSOR.FFN_NORM: (
-            "gpt_neox.layers.{bid}.post_attention_layernorm",               # gptneox
-            "transformer.h.{bid}.ln_2",                                     # gpt2 refact
-            "h.{bid}.post_attention_layernorm",                             # bloom
-            "transformer.blocks.{bid}.norm_2",                              # mpt
-            "model.layers.{bid}.post_attention_layernorm",                  # llama-hf
-            "layers.{bid}.ffn_norm",                                        # llama-pth
-            "encoder.layer.{bid}.output.LayerNorm",                         # bert
-            "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
-            "model.layers.{bid}.ln2",                                       # yi
+            "gpt_neox.layers.{bid}.post_attention_layernorm",                # gptneox
+            "transformer.h.{bid}.ln_2",                                      # gpt2 refact
+            "h.{bid}.post_attention_layernorm",                              # bloom
+            "transformer.blocks.{bid}.norm_2",                               # mpt
+            "model.layers.{bid}.post_attention_layernorm",                   # llama-hf
+            "layers.{bid}.ffn_norm",                                         # llama-pth
+            "encoder.layer.{bid}.output.LayerNorm",                          # bert
+            "language_model.encoder.layers.{bid}.post_attention_layernorm",  # persimmon
+            "model.layers.{bid}.ln2",                                        # yi
         ),
 
         # Feed-forward up
         MODEL_TENSOR.FFN_UP: (
-            "gpt_neox.layers.{bid}.mlp.dense_h_to_4h",               # gptneox
-            "transformer.h.{bid}.mlp.c_fc",                          # gpt2
-            "transformer.blocks.{bid}.ffn.up_proj",                  # mpt
-            "transformer.h.{bid}.mlp.dense_h_to_4h",                 # falcon
-            "h.{bid}.mlp.dense_h_to_4h",                             # bloom
-            "model.layers.{bid}.mlp.up_proj",                        # llama-hf refact
-            "layers.{bid}.feed_forward.w3",                          # llama-pth
-            "encoder.layer.{bid}.intermediate.dense",                # bert
-            "transformer.h.{bid}.mlp.fc_in",                         # gpt-j
-            "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
+            "gpt_neox.layers.{bid}.mlp.dense_h_to_4h",                # gptneox
+            "transformer.h.{bid}.mlp.c_fc",                           # gpt2
+            "transformer.blocks.{bid}.ffn.up_proj",                   # mpt
+            "transformer.h.{bid}.mlp.dense_h_to_4h",                  # falcon
+            "h.{bid}.mlp.dense_h_to_4h",                              # bloom
+            "model.layers.{bid}.mlp.up_proj",                         # llama-hf refact
+            "layers.{bid}.feed_forward.w3",                           # llama-pth
+            "encoder.layer.{bid}.intermediate.dense",                 # bert
+            "transformer.h.{bid}.mlp.fc_in",                          # gpt-j
+            "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h",  # persimmon
         ),
 
         # Feed-forward gate
         MODEL_TENSOR.FFN_GATE: (
-            "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
-            "layers.{bid}.feed_forward.w1",     # llama-pth
+            "model.layers.{bid}.mlp.gate_proj",  # llama-hf refact
+            "layers.{bid}.feed_forward.w1",      # llama-pth
         ),
 
         # Feed-forward down
         MODEL_TENSOR.FFN_DOWN: (
-            "gpt_neox.layers.{bid}.mlp.dense_4h_to_h",               # gptneox
-            "transformer.h.{bid}.mlp.c_proj",                        # gpt2 refact
-            "transformer.blocks.{bid}.ffn.down_proj",                # mpt
-            "transformer.h.{bid}.mlp.dense_4h_to_h",                 # falcon
-            "h.{bid}.mlp.dense_4h_to_h",                             # bloom
-            "model.layers.{bid}.mlp.down_proj",                      # llama-hf
-            "layers.{bid}.feed_forward.w2",                          # llama-pth
-            "encoder.layer.{bid}.output.dense",                      # bert
-            "transformer.h.{bid}.mlp.fc_out",                        # gpt-j
-            "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
+            "gpt_neox.layers.{bid}.mlp.dense_4h_to_h",                # gptneox
+            "transformer.h.{bid}.mlp.c_proj",                         # gpt2 refact
+            "transformer.blocks.{bid}.ffn.down_proj",                 # mpt
+            "transformer.h.{bid}.mlp.dense_4h_to_h",                  # falcon
+            "h.{bid}.mlp.dense_4h_to_h",                              # bloom
+            "model.layers.{bid}.mlp.down_proj",                       # llama-hf
+            "layers.{bid}.feed_forward.w2",                           # llama-pth
+            "encoder.layer.{bid}.output.dense",                       # bert
+            "transformer.h.{bid}.mlp.fc_out",                         # gpt-j
+            "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h",  # persimmon
         ),
 
         MODEL_TENSOR.ATTN_Q_NORM: (
@@ -192,8 +192,8 @@ class TensorNameMap:
         ),
 
         MODEL_TENSOR.ROPE_FREQS: (
-            "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
-        )
+            "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq",  # persimmon
+        ),
     }
 
     mapping: dict[str, tuple[MODEL_TENSOR, str]]
@@ -225,7 +225,7 @@ class TensorNameMap:
             if key.endswith(suffix):
                 result = self.mapping.get(key[:-len(suffix)])
                 if result is not None:
-                    return (result[0], result[1] + suffix)
+                    return result[0], result[1] + suffix
         return None
 
     def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
@@ -252,5 +252,6 @@ class TensorNameMap:
     def __repr__(self) -> str:
         return repr(self.mapping)
 
+
 def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
     return TensorNameMap(arch, n_blocks)
diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py
index 92040199d..8dea75e48 100644
--- a/gguf-py/gguf/vocab.py
+++ b/gguf-py/gguf/vocab.py
@@ -28,6 +28,26 @@ class SpecialVocab:
             self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
         self._load(Path(path))
 
+    def __repr__(self) -> str:
+        return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids or "unset"}>'
+
+    def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
+        if self.merges:
+            if not quiet:
+                print(f'gguf: Adding {len(self.merges)} merge(s).')
+            gw.add_token_merges(self.merges)
+        for typ, tokid in self.special_token_ids.items():
+            handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
+            if handler is None:
+                print(
+                    f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping',
+                    file = sys.stderr,
+                )
+                continue
+            if not quiet:
+                print(f'gguf: Setting special token type {typ} to {tokid}')
+            handler(tokid)
+
     def _load(self, path: Path) -> None:
         if not self._try_load_from_tokenizer_json(path):
             self._try_load_from_config_json(path)
@@ -38,9 +58,10 @@ class SpecialVocab:
         if self.n_vocab is None or tid < self.n_vocab:
             self.special_token_ids[typ] = tid
             return
-        print(f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
-            file = sys.stderr)
-
+        print(
+            f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
+            file = sys.stderr,
+        )
 
     def _try_load_from_tokenizer_json(self, path: Path) -> bool:
         tokenizer_file = path / 'tokenizer.json'
@@ -50,7 +71,7 @@ class SpecialVocab:
             tokenizer = json.load(f)
         if self.load_merges:
             merges = tokenizer.get('model', {}).get('merges')
-            if isinstance(merges, list) and len(merges) > 0 and isinstance(merges[0], str):
+            if isinstance(merges, list) and merges and isinstance(merges[0], str):
                 self.merges = merges
         tokenizer_config_file = path / 'tokenizer_config.json'
         added_tokens = tokenizer.get('added_tokens')
@@ -70,9 +91,10 @@ class SpecialVocab:
             else:
                 continue
             # We only need the first match here.
-            maybe_token_id = next((
-                atok.get('id') for atok in added_tokens
-                if atok.get('content') == tc_content), None)
+            maybe_token_id = next(
+                (atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
+                None,
+            )
             self._set_special_token(typ, maybe_token_id)
         return True
 
@@ -85,20 +107,3 @@ class SpecialVocab:
         for typ in self.special_token_types:
             self._set_special_token(typ, config.get(f'{typ}_token_id'))
         return True
-
-    def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
-        if len(self.merges) > 0:
-            if not quiet:
-                print(f'gguf: Adding {len(self.merges)} merge(s).')
-            gw.add_token_merges(self.merges)
-        for typ, tokid in self.special_token_ids.items():
-            handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
-            if handler is None:
-                print(f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping', file = sys.stderr)
-                continue
-            if not quiet:
-                print(f'gguf: Setting special token type {typ} to {tokid}')
-            handler(tokid)
-
-    def __repr__(self) -> str:
-        return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids or "unset"}>'