ggml-quants : 1.625 bpw ternary packing for BitNet 1.58b

2024-06-19 12:21:08 -04:00 · 2024-06-19 12:21:08 -04:00 · bd807499f7
commit bd807499f7
parent ac146628e4
11 changed files with 594 additions and 4 deletions
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -1023,6 +1023,8 @@ class GGMLQuantizationType(IntEnum):
    F64     = 28
    IQ1_M   = 29
    BF16    = 30
+    Q2_2    = 31
+    Q1_3    = 32


 # TODO: add GGMLFileType from ggml_ftype in ggml.h
@ -1064,6 +1066,8 @@ class LlamaFileType(IntEnum):
    MOSTLY_IQ4_XS        = 30  # except 1d tensors
    MOSTLY_IQ1_M         = 31  # except 1d tensors
    MOSTLY_BF16          = 32  # except 1d tensors
+    MOSTLY_Q2_2          = 33  # except 1d tensors
+    MOSTLY_Q1_3          = 34  # except 1d tensors

    GUESSED              = 1024  # not specified in the model file

@ -1137,6 +1141,8 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
    GGMLQuantizationType.F64:     (1, 8),
    GGMLQuantizationType.IQ1_M:   (256, QK_K // 8 + QK_K // 16  + QK_K // 32),
    GGMLQuantizationType.BF16:    (1, 2),
+    GGMLQuantizationType.Q2_2:    (32, 8),
+    GGMLQuantizationType.Q1_3:    (64, 12 + 1),
 }