From dccaec76abf35f6920b82d4e2793005352cb983f Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 15 Jan 2024 07:55:55 +0200 Subject: [PATCH] The check for 256 divisibility was missing for IQ2_XS, IQ2_XXS --- llama.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 7af38718c..f9718060d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8559,7 +8559,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty //} bool convert_incompatible_tensor = false; if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K || - new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) { + new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K || + new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS) { int nx = tensor->ne[0]; int ny = tensor->ne[1]; if (nx % QK_K != 0) { @@ -8571,6 +8572,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty } if (convert_incompatible_tensor) { switch (new_type) { + case GGML_TYPE_IQ2_XXS: + case GGML_TYPE_IQ2_XS: case GGML_TYPE_Q2_K: new_type = GGML_TYPE_Q4_0; break; case GGML_TYPE_Q3_K: new_type = GGML_TYPE_Q4_1; break; case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;