Restore n_tensor check.

2024-10-12 12:48:45 +08:00 · 2024-10-12 12:48:45 +08:00 · f64c768055
commit f64c768055
parent dfac0c4b3e
2 changed files with 7 additions and 3 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -367,6 +367,12 @@ class Model:
                    break

            for new_name, data_torch in (self._modify_tensors(data_torch, name, bid)):
+                # Some GPTQ models have empty bias tensors which are not in the model architecture.
+                # These tensors will cause tensor number check to fail, so we have to skip them.
+                if new_name.endswith(".bias") and np.all(LazyTorchTensor.to_eager(data_torch).numpy() == 0):
+                    logger.info(f"Skipping empty bias tensor: {new_name}")
+                    continue
+
                data = data_torch.squeeze().numpy()

                # if data ends up empty, it means data_torch was a scalar tensor -> restore
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -4783,9 +4783,7 @@ struct llama_model_loader {

    void done_getting_tensors() const {
        if (n_created != n_tensors) {
-            // Zero bias in some HuggingFace models will cause n_tensors mismatch
-            // Consider removing zero bias in convert_hf_to_gguf.py?
-            // throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created));
+            throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created));
        }
    }