convert-hf : begin refactoring write_tensor

This commit is contained in:
Francis Couture-Harpin 2024-04-30 14:07:28 -04:00
parent b8a7a5a90f
commit 47e02eb7bc
10 changed files with 386 additions and 852 deletions

View file

@ -861,7 +861,7 @@ class GGUFValueType(IntEnum):
# Note: Does not support GGML_QKK_64
QK_K = 256
# Items here are (block size, type size)
GGML_QUANT_SIZES = {
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
GGMLQuantizationType.F32: (1, 4),
GGMLQuantizationType.F16: (1, 2),
GGMLQuantizationType.Q4_0: (32, 2 + 16),