From 46054d1aab479bd2b90516572fa895dc9703b055 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Mon, 10 Jun 2024 04:30:47 +0200
Subject: [PATCH] truncate intermediate fp32 if converting bf16 to bf16

---
 convert-hf-to-gguf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 025405a2c..ac6363a48 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -295,8 +295,8 @@ class Model:
 
                 if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
                     if self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
-                        data = gguf.quantize_bf16(data)
-                        assert data.dtype == np.int16
+                        data = gguf.truncate_bf16(data) if old_dtype == torch.bfloat16 else gguf.quantize_bf16(data)
+                        assert data.dtype in (np.int16, np.uint16)
                         data_qtype = gguf.GGMLQuantizationType.BF16
 
                     elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data):