iq3_s_mult: play with blocks of 16

This brings the bpw to 3.5625. We come close but
don't quite match lookup with 3.4375 bpw (blocks of 32)
This commit is contained in:
Iwan Kawrakow 2024-03-03 16:43:00 +02:00
parent dbe98dfe70
commit f4cb4eac45
3 changed files with 24 additions and 11 deletions

View file

@ -201,10 +201,11 @@ typedef struct {
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
// 3.4375 bpw
#define IQ3S_BLOCK_SIZE 16
#if QK_K == 64
#define IQ3S_N_SCALE 2
#else
#define IQ3S_N_SCALE QK_K/64
#define IQ3S_N_SCALE QK_K/(2*IQ3S_BLOCK_SIZE)
#endif
typedef struct {
ggml_fp16_t d;