diff --git a/ggml-quants.c b/ggml-quants.c index ec695cf98..32f42bbcd 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -11928,9 +11928,9 @@ size_t quantize_iq1_m(const float * restrict src, void * restrict dst, int nrow, for (int row = 0; row < nrow; ++row) { quantize_row_iq1_impl(GGML_TYPE_IQ1_M, src, qrow, n_per_row, quant_weights, scales, weight, sumx, sumw, pairs, L, index, shifts); src += n_per_row; - qrow += nblock*sizeof(block_iq1_s); + qrow += nblock*sizeof(block_iq1_m); } - return nrow * nblock * sizeof(block_iq1_s); + return nrow * nblock * sizeof(block_iq1_m); } // ============================ 4-bit non-linear quants diff --git a/ggml.c b/ggml.c index 715f38ddf..be8691349 100644 --- a/ggml.c +++ b/ggml.c @@ -783,6 +783,18 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .nrows = 1, }, [GGML_TYPE_IQ1_S] = { + .type_name = "iq1_s", + .blck_size = QK_K, + .type_size = sizeof(block_iq1_s), + .is_quantized = true, + .to_float = (ggml_to_float_t) dequantize_row_iq1_s, + .from_float = NULL, + .from_float_reference = NULL, + .vec_dot = ggml_vec_dot_iq1_s_q8_K, + .vec_dot_type = GGML_TYPE_Q8_K, + .nrows = 1, + }, + [GGML_TYPE_IQ1_M] = { .type_name = "iq1_m", .blck_size = QK_K, .type_size = sizeof(block_iq1_m), @@ -794,18 +806,6 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, }, - [GGML_TYPE_IQ1_M] = { - .type_name = "iq1_m", - .blck_size = QK_K, - .type_size = sizeof(block_iq1_s), - .is_quantized = true, - .to_float = (ggml_to_float_t) dequantize_row_iq1_s, - .from_float = NULL, - .from_float_reference = NULL, - .vec_dot = ggml_vec_dot_iq1_s_q8_K, - .vec_dot_type = GGML_TYPE_Q8_K, - .nrows = 1, - }, [GGML_TYPE_IQ4_NL] = { .type_name = "iq4_nl", .blck_size = QK4_NL,