Merge branch 'master' into compilade/lazy-convert-hf
This commit is contained in:
commit
bffdaf4010
43 changed files with 1646 additions and 179 deletions
|
@ -817,6 +817,7 @@ class GGMLQuantizationType(IntEnum):
|
|||
I64 = 27
|
||||
F64 = 28
|
||||
IQ1_M = 29
|
||||
BF16 = 30
|
||||
|
||||
|
||||
class GGUFEndian(IntEnum):
|
||||
|
@ -888,6 +889,7 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
|||
GGMLQuantizationType.I64: (1, 8),
|
||||
GGMLQuantizationType.F64: (1, 8),
|
||||
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
||||
GGMLQuantizationType.BF16: (1, 2),
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue