gguf-py : fix BF16 numpy view type

2024-08-02 17:42:46 -04:00 · 2024-08-02 17:42:46 -04:00 · e82ff5a346
commit e82ff5a346
parent 861265b91e
1 changed files with 1 additions and 1 deletions
--- a/gguf-py/gguf/quants.py
+++ b/gguf-py/gguf/quants.py
@ -145,7 +145,7 @@ class BF16(__Quant, qtype=GGMLQuantizationType.BF16):
    @classmethod
    # same as ggml_compute_fp32_to_bf16 in ggml-impl.h
    def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
-        n = blocks.view(np.int32)
+        n = blocks.view(np.uint32)
        # force nan to quiet
        n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
        # round to nearest even