Apply the GQA2/Expert2 conditionality to the IQ3 quants
In coherence with the proposed modifications to the IQ2 quant strategies, which make even more sense for the IQ3 quant strategies.
This commit is contained in:
parent
b77cdd83ff
commit
63986631a3
1 changed files with 4 additions and 4 deletions
|
@ -15371,15 +15371,15 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
|
|||
}
|
||||
} else if (name.find("attn_v.weight") != std::string::npos) {
|
||||
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) {
|
||||
new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : GGML_TYPE_Q3_K;
|
||||
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_Q4_K : GGML_TYPE_Q3_K;
|
||||
}
|
||||
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && qs.model.hparams.n_gqa() >= 4) {
|
||||
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
|
||||
new_type = GGML_TYPE_Q4_K;
|
||||
}
|
||||
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
||||
new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
|
||||
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
|
||||
}
|
||||
else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) && qs.model.hparams.n_gqa() >= 4) {
|
||||
else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
|
||||
new_type = GGML_TYPE_Q4_K;
|
||||
}
|
||||
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue