iq3_s_mult: play with blocks of 16
This brings the bpw to 3.5625. We come close but don't quite match lookup with 3.4375 bpw (blocks of 32)
This commit is contained in:
parent
dbe98dfe70
commit
f4cb4eac45
3 changed files with 24 additions and 11 deletions
|
@ -201,10 +201,11 @@ typedef struct {
|
|||
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
||||
|
||||
// 3.4375 bpw
|
||||
#define IQ3S_BLOCK_SIZE 16
|
||||
#if QK_K == 64
|
||||
#define IQ3S_N_SCALE 2
|
||||
#else
|
||||
#define IQ3S_N_SCALE QK_K/64
|
||||
#define IQ3S_N_SCALE QK_K/(2*IQ3S_BLOCK_SIZE)
|
||||
#endif
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue