From f64c7680550ebf4dd0453013524cc1054b311d17 Mon Sep 17 00:00:00 2001 From: Qingtao Li Date: Sat, 12 Oct 2024 12:48:45 +0800 Subject: [PATCH] Restore n_tensor check. --- convert_hf_to_gguf.py | 6 ++++++ src/llama.cpp | 4 +--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a3d35c2db..45a3cb707 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -367,6 +367,12 @@ class Model: break for new_name, data_torch in (self._modify_tensors(data_torch, name, bid)): + # Some GPTQ models have empty bias tensors which are not in the model architecture. + # These tensors will cause tensor number check to fail, so we have to skip them. + if new_name.endswith(".bias") and np.all(LazyTorchTensor.to_eager(data_torch).numpy() == 0): + logger.info(f"Skipping empty bias tensor: {new_name}") + continue + data = data_torch.squeeze().numpy() # if data ends up empty, it means data_torch was a scalar tensor -> restore diff --git a/src/llama.cpp b/src/llama.cpp index 712d1d795..96699f260 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -4783,9 +4783,7 @@ struct llama_model_loader { void done_getting_tensors() const { if (n_created != n_tensors) { - // Zero bias in some HuggingFace models will cause n_tensors mismatch - // Consider removing zero bias in convert_hf_to_gguf.py? - // throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created)); + throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created)); } }