From eaf9571d9b85712d9e37843e76abaa9ad85bc3a8 Mon Sep 17 00:00:00 2001
From: Nexesenex <124105151+Nexesenex@users.noreply.github.com>
Date: Tue, 26 Mar 2024 10:11:46 +0100
Subject: [PATCH] Update llama.cpp -  exception for the IQ2_S token embedding
 error

---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 68f18453b..b940ea3d2 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -12984,8 +12984,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
             }
             if ((new_type == GGML_TYPE_IQ2_XXS ||
                  new_type == GGML_TYPE_IQ2_XS  ||
-                 new_type == GGML_TYPE_IQ2_S   ||
                  new_type == GGML_TYPE_IQ1_S   ||
+                (new_type == GGML_TYPE_IQ2_S && strcmp(tensor->name, "token_embd.weight"))  ||
                 (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
                 LLAMA_LOG_ERROR("\n\n============================================================\n");
                 LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);