gguf-py : fix and simplify quantized shape round-trip (#7483)

* gguf-py : fix and simplify quantized shape round-trip

* gguf-py : remove unused import
This commit is contained in:
compilade 2024-05-24 21:11:48 -04:00 committed by GitHub
parent d041d2ceaa
commit b83bab15a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 27 additions and 14 deletions

View file

@ -1,5 +1,5 @@
from __future__ import annotations
from typing import Callable
from typing import Callable, Sequence
from numpy.typing import DTypeLike
@ -9,6 +9,20 @@ from .lazy import LazyNumpyTensor
import numpy as np
def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
block_size, type_size = GGML_QUANT_SIZES[quant_type]
if shape[-1] % block_size != 0:
raise ValueError(f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})")
return (*shape[:-1], shape[-1] // block_size * type_size)
def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
block_size, type_size = GGML_QUANT_SIZES[quant_type]
if shape[-1] % type_size != 0:
raise ValueError(f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})")
return (*shape[:-1], shape[-1] // type_size * block_size)
# same as ggml_compute_fp32_to_bf16 in ggml-impl.h
def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
n = n.astype(np.float32, copy=False).view(np.int32)