iq1_m: minor
This commit is contained in:
parent
19fb974d77
commit
abc1d4f951
3 changed files with 2 additions and 12 deletions
|
@ -26,7 +26,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
|
|||
{ "IQ2_S", LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization", },
|
||||
{ "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization", },
|
||||
{ "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization", },
|
||||
{ "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.81 bpw quantization", },
|
||||
{ "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization", },
|
||||
{ "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.63G, +0.6717 ppl @ LLaMA-v1-7B", },
|
||||
{ "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B", },
|
||||
{ "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization", },
|
||||
|
|
|
@ -12177,16 +12177,6 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
|
|||
sc[1] |= ((s.u16 & 0x00f0) << 8);
|
||||
sc[2] |= ((s.u16 & 0x0f00) << 4);
|
||||
sc[3] |= ((s.u16 & 0xf000) << 0);
|
||||
//y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.125f is another fudge factor. Don't ask me why it is needed.
|
||||
//for (int ib = 0; ib < QK_K/block_size; ib += 2) {
|
||||
// int l1 = nearest_int(0.5f*(id*scales[ib+0]-1));
|
||||
// l1 = MAX(0, MIN(7, l1));
|
||||
// int l2 = nearest_int(0.5f*(id*scales[ib+1]-1));
|
||||
// l2 = MAX(0, MIN(7, l2));
|
||||
// y[ibl].scales[ib/2] = l1 | (l2 << 4);
|
||||
// y[ibl].qh[ib+0] |= masks[shifts[ib+0]];
|
||||
// y[ibl].qh[ib+1] |= masks[shifts[ib+1]];
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3414,7 +3414,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
|
|||
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
|
||||
case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw";
|
||||
case LLAMA_FTYPE_MOSTLY_IQ1_S :return "IQ1_S - 1.5625 bpw";
|
||||
case LLAMA_FTYPE_MOSTLY_IQ1_M :return "IQ1_M - 1.8125 bpw";
|
||||
case LLAMA_FTYPE_MOSTLY_IQ1_M :return "IQ1_M - 1.75 bpw";
|
||||
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
|
||||
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
|
||||
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue