ggml : add TQ1_0 and TQ2_0 ternary quantization types

This commit is contained in:
Francis Couture-Harpin 2024-07-30 17:55:54 -04:00
parent 79a278e922
commit 77b8f84ae7
10 changed files with 563 additions and 16 deletions

View file

@ -1145,8 +1145,13 @@ class GGMLQuantizationType(IntEnum):
F64 = 28
IQ1_M = 29
BF16 = 30
Q2_2 = 31
Q1_3 = 32
Q4_0_4_4 = 31
Q4_0_4_8 = 32
Q4_0_8_8 = 33
TQ1_0 = 34
TQ2_0 = 35
Q1_3 = 36
Q2_2 = 37
# TODO: add GGMLFileType from ggml_ftype in ggml.h