finish f16 hf bitnet e2e

This commit is contained in:
Eddie-Wang1120 2024-06-07 14:42:52 +08:00
parent 1f2e0ee012
commit 5e59660173
10 changed files with 440 additions and 11 deletions

View file

@ -925,6 +925,7 @@ class GGMLQuantizationType(IntEnum):
F64 = 28
IQ1_M = 29
BF16 = 30
I2 = 31
# TODO: add GGMLFileType from ggml_ftype in ggml.h
@ -966,6 +967,7 @@ class LlamaFileType(IntEnum):
MOSTLY_IQ4_XS = 30 # except 1d tensors
MOSTLY_IQ1_M = 31 # except 1d tensors
MOSTLY_BF16 = 32 # except 1d tensors
MOSTLY_I2 = 33 # except 1d tensors
GUESSED = 1024 # not specified in the model file
@ -1032,6 +1034,7 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
GGMLQuantizationType.I2: (1, 1),
GGMLQuantizationType.I8: (1, 1),
GGMLQuantizationType.I16: (1, 2),
GGMLQuantizationType.I32: (1, 4),