finish f16 hf bitnet e2e

2024-06-07 14:42:52 +08:00 · 2024-06-07 14:42:52 +08:00 · 5e59660173
commit 5e59660173
parent 1f2e0ee012
10 changed files with 440 additions and 11 deletions
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -925,6 +925,7 @@ class GGMLQuantizationType(IntEnum):
    F64     = 28
    IQ1_M   = 29
    BF16    = 30
+    I2      = 31


 # TODO: add GGMLFileType from ggml_ftype in ggml.h
@ -966,6 +967,7 @@ class LlamaFileType(IntEnum):
    MOSTLY_IQ4_XS        = 30  # except 1d tensors
    MOSTLY_IQ1_M         = 31  # except 1d tensors
    MOSTLY_BF16          = 32  # except 1d tensors
+    MOSTLY_I2            = 33  # except 1d tensors

    GUESSED              = 1024  # not specified in the model file

@ -1032,6 +1034,7 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
    GGMLQuantizationType.IQ3_S:   (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
    GGMLQuantizationType.IQ2_S:   (256, 2 + QK_K // 4 + QK_K // 16),
    GGMLQuantizationType.IQ4_XS:  (256, 2 + 2 + QK_K // 2 + QK_K // 64),
+    GGMLQuantizationType.I2:      (1, 1),
    GGMLQuantizationType.I8:      (1, 1),
    GGMLQuantizationType.I16:     (1, 2),
    GGMLQuantizationType.I32:     (1, 4),