gguf-py : remove LlamaFileTypeMap

Too specific to 'llama.cpp', and would be a maintenance burden
to keep up to date.

* gguf-py : add generic quantize and dequantize functions

The quant classes no longer need to be known,
only the target or the source type,
for 'quantize' and 'dequantize', respectively.
This commit is contained in:
Francis Couture-Harpin 2024-08-03 21:22:37 -04:00
parent e82ff5a346
commit 229c35cb59
4 changed files with 54 additions and 58 deletions

View file

@ -301,20 +301,25 @@ class Model:
):
data_qtype = gguf.GGMLQuantizationType.F32
# No override (data_qtype is False), or wants to be quantized (data_qtype is True)
if isinstance(data_qtype, bool):
data_qtype = gguf.LlamaFileTypeMap.get(self.ftype, gguf.GGMLQuantizationType.F32)
if data_qtype == gguf.GGMLQuantizationType.Q8_0:
if gguf.quants.Q8_0.can_quantize(data):
data = gguf.quants.Q8_0.quantize(data)
else: # fallback to f16
if self.ftype == gguf.LlamaFileType.ALL_F32:
data_qtype = gguf.GGMLQuantizationType.F32
elif self.ftype == gguf.LlamaFileType.MOSTLY_F16:
data_qtype = gguf.GGMLQuantizationType.F16
if data_qtype == gguf.GGMLQuantizationType.BF16:
data = gguf.quants.BF16.quantize(data)
if data_qtype == gguf.GGMLQuantizationType.F16:
data = data.astype(np.float16, copy=False)
if data_qtype == gguf.GGMLQuantizationType.F32:
data = data.astype(np.float32, copy=False)
elif self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
data_qtype = gguf.GGMLQuantizationType.BF16
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0:
data_qtype = gguf.GGMLQuantizationType.Q8_0
else:
raise ValueError(f"Unknown file type: {self.ftype.name}")
try:
data = gguf.quants.quantize(data, data_qtype)
except gguf.QuantError as e:
logger.warning("%s, %s", e, "falling back to F16")
data_qtype = gguf.GGMLQuantizationType.F16
data = gguf.quants.quantize(data, data_qtype)
shape = gguf.quant_shape_from_byte_shape(data.shape, data_qtype) if data.dtype == np.uint8 else data.shape