correct mistake in conditionality for attn.k

This commit is contained in:
Nexes the Old 2024-08-08 18:56:20 +02:00 committed by GitHub
parent 8006b15fd1
commit 1118c046df
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -15414,10 +15414,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
// TODO: explore better strategies
new_type = GGML_TYPE_Q8_0;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && (qs.model.hparams.n_gqa() < 2 || qs.model.hparams.n_expert < 2)) {
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && qs.model.hparams.n_gqa() < 2 && qs.model.hparams.n_expert < 2) {
new_type = GGML_TYPE_IQ3_XXS;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && (qs.model.hparams.n_gqa() < 2 || qs.model.hparams.n_expert < 2)) {
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && qs.model.hparams.n_gqa() < 2 && qs.model.hparams.n_expert < 2) {
new_type = GGML_TYPE_IQ2_S;
}
} else if (name.find("attn_q.weight") != std::string::npos) {