diff --git a/ggml-metal.metal b/ggml-metal.metal index cfcc32257..aa954926b 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -2526,12 +2526,17 @@ typedef struct { // 98 bytes / block for QK_K = 256, so 3.0625 bpw // 3.4375 bpw +#if QK_K == 64 +#define IQ3S_N_SCALE 2 +#else +#define IQ3S_N_SCALE QK_K/64 +#endif typedef struct { half d; uint8_t qs[QK_K/4]; uint8_t qh[QK_K/32]; uint8_t signs[QK_K/8]; - uint8_t scales[QK_K/64]; + uint8_t scales[IQ3S_N_SCALE]; } block_iq3_s; typedef struct { diff --git a/ggml-quants.c b/ggml-quants.c index c1a168328..5c5f2ce1b 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -10772,7 +10772,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v const int kMaxQ = 8; - const int nbl = n/256; + const int nbl = n/QK_K; ggml_fp16_t * dh; uint8_t * qs; @@ -11018,7 +11018,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo const int kMaxQ = 8; - const int nbl = n/256; + const int nbl = n/QK_K; block_iq3_s * y = vy; @@ -11189,7 +11189,7 @@ size_t quantize_iq3_s(const float * src, void * dst, int nrow, int n_per_row, in uint8_t block_signs[IQ3S_BLOCK_SIZE/8]; char * qrow = (char *)dst; for (int row = 0; row < nrow; ++row) { - quantize_row_iq3_s_impl(32, src, qrow, n_per_row, quant_weights, + quantize_row_iq3_s_impl(IQ3S_BLOCK_SIZE, src, qrow, n_per_row, quant_weights, scales, weight, xval, L, Laux, waux, is_on_grid, is_on_grid_aux, block_signs); src += n_per_row; qrow += nblock*sizeof(block_iq3_s); diff --git a/ggml-quants.h b/ggml-quants.h index c217db925..303b0b6f9 100644 --- a/ggml-quants.h +++ b/ggml-quants.h @@ -191,15 +191,20 @@ typedef struct { } block_iq3_xxs; static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding"); -// 3.3125 bpw +// 3.4375 bpw +#if QK_K == 64 +#define IQ3S_N_SCALE 2 +#else +#define IQ3S_N_SCALE QK_K/64 +#endif typedef struct { ggml_fp16_t d; uint8_t qs[QK_K/4]; uint8_t qh[QK_K/32]; uint8_t signs[QK_K/8]; - uint8_t scales[QK_K/64]; + uint8_t scales[IQ3S_N_SCALE]; } block_iq3_s; -static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 27*(QK_K/64), "wrong iq3_s block size/padding"); +static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding"); typedef struct { ggml_fp16_t d;