diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 59ef1405c..521012889 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -26,7 +26,7 @@ static const std::vector QUANT_OPTIONS = { { "IQ2_S", LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization", }, { "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization", }, { "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization", }, - { "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.81 bpw quantization", }, + { "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization", }, { "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.63G, +0.6717 ppl @ LLaMA-v1-7B", }, { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B", }, { "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization", }, diff --git a/ggml-quants.c b/ggml-quants.c index a51013ae4..e801e0c28 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -12177,16 +12177,6 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy sc[1] |= ((s.u16 & 0x00f0) << 8); sc[2] |= ((s.u16 & 0x0f00) << 4); sc[3] |= ((s.u16 & 0xf000) << 0); - //y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.125f is another fudge factor. Don't ask me why it is needed. - //for (int ib = 0; ib < QK_K/block_size; ib += 2) { - // int l1 = nearest_int(0.5f*(id*scales[ib+0]-1)); - // l1 = MAX(0, MIN(7, l1)); - // int l2 = nearest_int(0.5f*(id*scales[ib+1]-1)); - // l2 = MAX(0, MIN(7, l2)); - // y[ibl].scales[ib/2] = l1 | (l2 << 4); - // y[ibl].qh[ib+0] |= masks[shifts[ib+0]]; - // y[ibl].qh[ib+1] |= masks[shifts[ib+1]]; - //} } } diff --git a/llama.cpp b/llama.cpp index e852793ad..0c1c5645a 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3414,7 +3414,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) { case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw"; case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw"; case LLAMA_FTYPE_MOSTLY_IQ1_S :return "IQ1_S - 1.5625 bpw"; - case LLAMA_FTYPE_MOSTLY_IQ1_M :return "IQ1_M - 1.8125 bpw"; + case LLAMA_FTYPE_MOSTLY_IQ1_M :return "IQ1_M - 1.75 bpw"; case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw"; case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw"; case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";