quantize: fix F16/F32 downcast to q6_K
This commit is contained in:
parent
b838b53ad6
commit
13a39058d3
1 changed files with 1 additions and 1 deletions
|
@ -11675,7 +11675,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
||||||
ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
|
ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
|
||||||
new_type = GGML_TYPE_Q5_K;
|
new_type = GGML_TYPE_Q5_K;
|
||||||
}
|
}
|
||||||
else if (new_type != GGML_TYPE_Q8_0) {
|
else if (new_type != GGML_TYPE_Q8_0 && new_type != GGML_TYPE_F16 && new_type != GGML_TYPE_F32) {
|
||||||
new_type = GGML_TYPE_Q6_K;
|
new_type = GGML_TYPE_Q6_K;
|
||||||
}
|
}
|
||||||
} else if (name == "token_embd.weight") {
|
} else if (name == "token_embd.weight") {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue