attn_qkv.weight in IQ4_XS for FTYPE IQ3_M

If FTYPE IQ4_XS has attn_qkv.weight in IQ4_XS, then FTYPE IQ3_M should not have it in Q4_K (4.5BPW), but in IQ4_XS (4.25BPW) also.
2024-08-04 12:06:06 +02:00 · 2024-08-04 12:06:06 +02:00 · 59c5d479de
commit 59c5d479de
parent 93c35f86a9
1 changed files with 2 additions and 1 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -15493,9 +15493,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
        }
    }
    else if (name.find("attn_qkv.weight") != std::string::npos) {
-        if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
+        if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) {
            new_type = GGML_TYPE_Q4_K;
        }
+        else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) new_type = GGML_TYPE_IQ4_XS;
        else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M) new_type = GGML_TYPE_Q5_K;
        else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) new_type = GGML_TYPE_Q6_K;
    }