From f84d25dd8fcf706e357b79ceda1437273d9b76ee Mon Sep 17 00:00:00 2001
From: Qingtao Li <qingtaoli@microsoft.com>
Date: Wed, 30 Oct 2024 16:00:38 +0800
Subject: [PATCH] Limit enable_t_mac to take effect on INT_N only.

---
 convert_hf_to_gguf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index f44c4ca6d..7027948b9 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1798,8 +1798,8 @@ class BitnetModel(Model):
         ]):
             # transform weight into 1/0/-1 (in fp32)
             data_torch = self.weight_quant(data_torch)
-            if self.enable_t_mac:
-                # transform weight into T-MAC I2 format
+            if self.enable_t_mac and self.ftype == gguf.LlamaFileType.MOSTLY_INT_N:
+                # transform weight into T-MAC INT_N format
                 from t_mac.model_utils import preprocess_for_t_mac
                 data = LazyTorchTensor.to_eager(data_torch).numpy()
                 scale = np.max(np.abs(data))