Update ggml-quants.c

This commit is contained in:
Sigbjørn Skjæret 2024-03-26 18:58:59 +01:00 committed by GitHub
parent 557410b8f0
commit d674812474
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -11779,6 +11779,8 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
uint16_t * index,
int8_t * shifts) {
float waux[IQ1S_BLOCK_SIZE];
const int gindex = iq2_data_index(GGML_TYPE_IQ1_S);
const uint64_t * kgrid_q2xs = iq2_data[gindex].grid;
@ -11814,12 +11816,13 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
const float * xbl = x + QK_K*ibl;
float sumx2 = 0;
for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
float sigma2 = 2*sumx2/QK_K;
float sigma2 = sumx2/QK_K;
for (int ib = 0; ib < QK_K/block_size; ++ib) {
const float * xb = xbl + block_size*ib;
const float * qw = quant_weights + QK_K*ibl + block_size*ib;
for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
for (int i = 0; i < block_size; ++i) waux[i] = sqrtf(weight[i]);
float max = fabsf(xb[0]);
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
if (!max) {
@ -11881,7 +11884,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
if (grid_index < 0) {
all_on_grid = false;
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, waux + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
GGML_ASSERT(grid_index >= 0);
}
index[k] = grid_index;
@ -11955,6 +11958,8 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
uint16_t * index,
int8_t * shifts) {
float waux[IQ1M_BLOCK_SIZE];
const int gindex = iq2_data_index(GGML_TYPE_IQ1_M);
const uint64_t * kgrid_q2xs = iq2_data[gindex].grid;
@ -11996,13 +12001,14 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
const float * xbl = x + QK_K*ibl;
float sumx2 = 0;
for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
float sigma2 = 2*sumx2/QK_K;
float sigma2 = sumx2/QK_K;
for (int ib = 0; ib < QK_K/block_size; ++ib) {
const float * xb = xbl + block_size*ib;
if (quant_weights) {
const float * qw = quant_weights + QK_K*ibl + block_size*ib;
for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
for (int i = 0; i < block_size; ++i) waux[i] = sqrtf(weight[i]);
} else {
for (int i = 0; i < block_size; ++i) weight[i] = xb[i]*xb[i];
}
@ -12127,7 +12133,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
if (grid_index < 0) {
all_on_grid = false;
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, (quant_weights ? waux : weight) + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
GGML_ASSERT(grid_index >= 0);
}
index[k] = grid_index;