diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py index abc621163..a5a78a7bc 100644 --- a/gguf-py/gguf/quants.py +++ b/gguf-py/gguf/quants.py @@ -145,7 +145,7 @@ class BF16(__Quant, qtype=GGMLQuantizationType.BF16): @classmethod # same as ggml_compute_fp32_to_bf16 in ggml-impl.h def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray: - n = blocks.view(np.int32) + n = blocks.view(np.uint32) # force nan to quiet n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n) # round to nearest even