gguf-py : fix BF16 numpy view type

This commit is contained in:
Francis Couture-Harpin 2024-08-02 17:42:46 -04:00
parent 861265b91e
commit e82ff5a346

View file

@ -145,7 +145,7 @@ class BF16(__Quant, qtype=GGMLQuantizationType.BF16):
@classmethod
# same as ggml_compute_fp32_to_bf16 in ggml-impl.h
def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
n = blocks.view(np.int32)
n = blocks.view(np.uint32)
# force nan to quiet
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
# round to nearest even