Revamp a bit output weight
for more granularity in low quants.
This commit is contained in:
parent
f796954872
commit
6b5cebfb2b
1 changed files with 17 additions and 5 deletions
|
@ -16426,23 +16426,35 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
|
||||||
if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
|
if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
|
||||||
new_type = GGML_TYPE_Q8_0;
|
new_type = GGML_TYPE_Q8_0;
|
||||||
}
|
}
|
||||||
|
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
|
||||||
|
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
||||||
|
else new_type = GGML_TYPE_Q4_K;
|
||||||
|
}
|
||||||
|
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
|
||||||
|
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
||||||
|
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
|
||||||
|
else new_type = GGML_TYPE_Q6_K;
|
||||||
|
}
|
||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
|
||||||
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
||||||
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ4_XS;
|
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ4_XS;
|
||||||
|
else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
|
||||||
|
else new_type = GGML_TYPE_IQ4_XS;
|
||||||
|
}
|
||||||
|
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
|
||||||
|
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
||||||
else new_type = GGML_TYPE_Q4_K;
|
else new_type = GGML_TYPE_Q4_K;
|
||||||
}
|
}
|
||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
|
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
|
||||||
ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL ||
|
|
||||||
ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
|
|
||||||
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
||||||
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
|
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
|
||||||
else new_type = GGML_TYPE_Q5_K;
|
else new_type = GGML_TYPE_Q5_K;
|
||||||
}
|
}
|
||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
|
||||||
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
||||||
else new_type = GGML_TYPE_Q5_K;
|
else new_type = GGML_TYPE_Q5_K;
|
||||||
}
|
}
|
||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
|
||||||
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
|
||||||
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
|
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
|
||||||
else new_type = GGML_TYPE_Q6_K;
|
else new_type = GGML_TYPE_Q6_K;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue