diff --git a/llama.cpp b/llama.cpp index 68f18453b..b940ea3d2 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12984,8 +12984,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } if ((new_type == GGML_TYPE_IQ2_XXS || new_type == GGML_TYPE_IQ2_XS || - new_type == GGML_TYPE_IQ2_S || new_type == GGML_TYPE_IQ1_S || + (new_type == GGML_TYPE_IQ2_S && strcmp(tensor->name, "token_embd.weight")) || (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) { LLAMA_LOG_ERROR("\n\n============================================================\n"); LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);