diff --git a/src/llama.cpp b/src/llama.cpp index 6827cb0dc..54732db2a 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -15441,7 +15441,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_q_type < GGML_TYPE_COUNT) { new_type = qs.params->attn_q_type; } - if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) { + else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) { new_type = GGML_TYPE_IQ3_XXS; } else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) { @@ -15453,7 +15453,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_down_type < GGML_TYPE_COUNT) { new_type = qs.params->ffn_down_type; } - else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K; + else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K; else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S) { if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K; } @@ -15522,7 +15522,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_qkv_type < GGML_TYPE_COUNT) { new_type = qs.params->attn_qkv_type; } - if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) { + else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) { new_type = GGML_TYPE_Q4_K; } else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M) new_type = GGML_TYPE_Q5_K;