iq3_s: partial fix for QK_K = 64
This commit is contained in:
parent
1d47de3258
commit
e6e61e3158
3 changed files with 17 additions and 7 deletions
|
@ -2526,12 +2526,17 @@ typedef struct {
|
|||
// 98 bytes / block for QK_K = 256, so 3.0625 bpw
|
||||
|
||||
// 3.4375 bpw
|
||||
#if QK_K == 64
|
||||
#define IQ3S_N_SCALE 2
|
||||
#else
|
||||
#define IQ3S_N_SCALE QK_K/64
|
||||
#endif
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t signs[QK_K/8];
|
||||
uint8_t scales[QK_K/64];
|
||||
uint8_t scales[IQ3S_N_SCALE];
|
||||
} block_iq3_s;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -10772,7 +10772,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
|
|||
|
||||
const int kMaxQ = 8;
|
||||
|
||||
const int nbl = n/256;
|
||||
const int nbl = n/QK_K;
|
||||
|
||||
ggml_fp16_t * dh;
|
||||
uint8_t * qs;
|
||||
|
@ -11018,7 +11018,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
|
|||
|
||||
const int kMaxQ = 8;
|
||||
|
||||
const int nbl = n/256;
|
||||
const int nbl = n/QK_K;
|
||||
|
||||
block_iq3_s * y = vy;
|
||||
|
||||
|
@ -11189,7 +11189,7 @@ size_t quantize_iq3_s(const float * src, void * dst, int nrow, int n_per_row, in
|
|||
uint8_t block_signs[IQ3S_BLOCK_SIZE/8];
|
||||
char * qrow = (char *)dst;
|
||||
for (int row = 0; row < nrow; ++row) {
|
||||
quantize_row_iq3_s_impl(32, src, qrow, n_per_row, quant_weights,
|
||||
quantize_row_iq3_s_impl(IQ3S_BLOCK_SIZE, src, qrow, n_per_row, quant_weights,
|
||||
scales, weight, xval, L, Laux, waux, is_on_grid, is_on_grid_aux, block_signs);
|
||||
src += n_per_row;
|
||||
qrow += nblock*sizeof(block_iq3_s);
|
||||
|
|
|
@ -191,15 +191,20 @@ typedef struct {
|
|||
} block_iq3_xxs;
|
||||
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
||||
|
||||
// 3.3125 bpw
|
||||
// 3.4375 bpw
|
||||
#if QK_K == 64
|
||||
#define IQ3S_N_SCALE 2
|
||||
#else
|
||||
#define IQ3S_N_SCALE QK_K/64
|
||||
#endif
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t signs[QK_K/8];
|
||||
uint8_t scales[QK_K/64];
|
||||
uint8_t scales[IQ3S_N_SCALE];
|
||||
} block_iq3_s;
|
||||
static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 27*(QK_K/64), "wrong iq3_s block size/padding");
|
||||
static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
|
||||
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue