clean code 2

2024-06-09 21:15:02 +08:00 · 2024-06-09 21:15:02 +08:00 · 3a0f8b0697
commit 3a0f8b0697
parent 1c5a8b7fec
5 changed files with 100 additions and 51 deletions
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -925,7 +925,6 @@ class GGMLQuantizationType(IntEnum):
    F64     = 28
    IQ1_M   = 29
    BF16    = 30
-    I2      = 31


 # TODO: add GGMLFileType from ggml_ftype in ggml.h
@ -967,7 +966,6 @@ class LlamaFileType(IntEnum):
    MOSTLY_IQ4_XS        = 30  # except 1d tensors
    MOSTLY_IQ1_M         = 31  # except 1d tensors
    MOSTLY_BF16          = 32  # except 1d tensors
-    MOSTLY_I2            = 33  # except 1d tensors

    GUESSED              = 1024  # not specified in the model file

@ -1034,7 +1032,6 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
    GGMLQuantizationType.IQ3_S:   (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
    GGMLQuantizationType.IQ2_S:   (256, 2 + QK_K // 4 + QK_K // 16),
    GGMLQuantizationType.IQ4_XS:  (256, 2 + 2 + QK_K // 2 + QK_K // 64),
-    GGMLQuantizationType.I2:      (1, 1),
    GGMLQuantizationType.I8:      (1, 1),
    GGMLQuantizationType.I16:     (1, 2),
    GGMLQuantizationType.I32:     (1, 4),
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@ -225,10 +225,8 @@ class GGUFWriter:
                dtype = GGMLQuantizationType.I32
            elif tensor_dtype == np.int64:
                dtype = GGMLQuantizationType.I64
-            elif tensor_dtype == np.uint8:
-                dtype = GGMLQuantizationType.I2
            else:
-                raise ValueError("Only F16, F32, F64, I8, I16, I32, I64, I2 tensors are supported for now")
+                raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
        else:
            dtype = raw_dtype
            if tensor_dtype == np.uint8:
@ -239,10 +237,7 @@ class GGUFWriter:
            self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
        self.ti_data += self._pack("I", dtype)
        self.ti_data += self._pack("Q", self.offset_tensor)
-        if dtype == GGMLQuantizationType.I2:
-            self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment) + self.data_alignment
-        else:
-            self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
+        self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
        self.ti_data_count += 1

    def add_tensor(
@ -257,9 +252,7 @@ class GGUFWriter:
            self.temp_file = fp

        shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
-
-        if (raw_dtype != GGMLQuantizationType.F32 or not name.endswith("scale")):
-            self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
+        self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)

        if self.temp_file is None:
            self.tensors.append(tensor)