From 59c5d479de38a95b206727a1050d619be6357298 Mon Sep 17 00:00:00 2001 From: Nexes the Old <124105151+Nexesenex@users.noreply.github.com> Date: Sun, 4 Aug 2024 12:06:06 +0200 Subject: [PATCH] attn_qkv.weight in IQ4_XS for FTYPE IQ3_M If FTYPE IQ4_XS has attn_qkv.weight in IQ4_XS, then FTYPE IQ3_M should not have it in Q4_K (4.5BPW), but in IQ4_XS (4.25BPW) also. --- src/llama.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 931a36a86..87657737a 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -15493,9 +15493,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n } } else if (name.find("attn_qkv.weight") != std::string::npos) { - if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) { + if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) { new_type = GGML_TYPE_Q4_K; } + else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) new_type = GGML_TYPE_IQ4_XS; else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M) new_type = GGML_TYPE_Q5_K; else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) new_type = GGML_TYPE_Q6_K; }