gguf-py : fix BF16 numpy view type
This commit is contained in:
parent
861265b91e
commit
e82ff5a346
1 changed files with 1 additions and 1 deletions
|
@ -145,7 +145,7 @@ class BF16(__Quant, qtype=GGMLQuantizationType.BF16):
|
||||||
@classmethod
|
@classmethod
|
||||||
# same as ggml_compute_fp32_to_bf16 in ggml-impl.h
|
# same as ggml_compute_fp32_to_bf16 in ggml-impl.h
|
||||||
def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
|
def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
|
||||||
n = blocks.view(np.int32)
|
n = blocks.view(np.uint32)
|
||||||
# force nan to quiet
|
# force nan to quiet
|
||||||
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
|
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
|
||||||
# round to nearest even
|
# round to nearest even
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue