ggml : drop support for QK_K=64 (#7473)

* ggml : drop support for QK_K=64

ggml-ci

* opencl : restore QK_K=256 define
This commit is contained in:
Georgi Gerganov 2024-05-23 10:00:21 +03:00 committed by GitHub
parent 1b1e27cb49
commit e84b71c2c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 26 additions and 4049 deletions

View file

@ -905,9 +905,8 @@ class GGUFValueType(IntEnum):
raise ValueError(f"Unknown type: {type(val)}")
# Note: Does not support GGML_QKK_64
QK_K = 256
# Items here are (block size, type size)
QK_K = 256
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
GGMLQuantizationType.F32: (1, 4),
GGMLQuantizationType.F16: (1, 2),