From f64c7680550ebf4dd0453013524cc1054b311d17 Mon Sep 17 00:00:00 2001
From: Qingtao Li <qingtaoli@microsoft.com>
Date: Sat, 12 Oct 2024 12:48:45 +0800
Subject: [PATCH] Restore n_tensor check.

---
 convert_hf_to_gguf.py | 6 ++++++
 src/llama.cpp         | 4 +---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index a3d35c2db..45a3cb707 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -367,6 +367,12 @@ class Model:
                     break
 
             for new_name, data_torch in (self._modify_tensors(data_torch, name, bid)):
+                # Some GPTQ models have empty bias tensors which are not in the model architecture.
+                # These tensors will cause tensor number check to fail, so we have to skip them.
+                if new_name.endswith(".bias") and np.all(LazyTorchTensor.to_eager(data_torch).numpy() == 0):
+                    logger.info(f"Skipping empty bias tensor: {new_name}")
+                    continue
+
                 data = data_torch.squeeze().numpy()
 
                 # if data ends up empty, it means data_torch was a scalar tensor -> restore
diff --git a/src/llama.cpp b/src/llama.cpp
index 712d1d795..96699f260 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -4783,9 +4783,7 @@ struct llama_model_loader {
 
     void done_getting_tensors() const {
         if (n_created != n_tensors) {
-            // Zero bias in some HuggingFace models will cause n_tensors mismatch
-            // Consider removing zero bias in convert_hf_to_gguf.py?
-            // throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created));
+            throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created));
         }
     }