Limit enable_t_mac to take effect on INT_N only.

This commit is contained in:
Qingtao Li 2024-10-30 16:00:38 +08:00
parent 080d2ecc56
commit f84d25dd8f

View file

@ -1798,8 +1798,8 @@ class BitnetModel(Model):
]): ]):
# transform weight into 1/0/-1 (in fp32) # transform weight into 1/0/-1 (in fp32)
data_torch = self.weight_quant(data_torch) data_torch = self.weight_quant(data_torch)
if self.enable_t_mac: if self.enable_t_mac and self.ftype == gguf.LlamaFileType.MOSTLY_INT_N:
# transform weight into T-MAC I2 format # transform weight into T-MAC INT_N format
from t_mac.model_utils import preprocess_for_t_mac from t_mac.model_utils import preprocess_for_t_mac
data = LazyTorchTensor.to_eager(data_torch).numpy() data = LazyTorchTensor.to_eager(data_torch).numpy()
scale = np.max(np.abs(data)) scale = np.max(np.abs(data))