ggml : fix quants nans when all the group weights are very close to zero

This commit is contained in:
slaren 2024-05-15 23:26:11 +02:00
parent e1b40ac3b9
commit 6fa6a9a10a
2 changed files with 17 additions and 8 deletions

View file

@ -1109,7 +1109,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
float ax = fabsf(x[i]); float ax = fabsf(x[i]);
if (ax > amax) { amax = ax; max = x[i]; } if (ax > amax) { amax = ax; max = x[i]; }
} }
if (amax < 1e-30f) { // all zero if (amax < 1e-20f) { // all zero
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
L[i] = 0; L[i] = 0;
} }
@ -1177,7 +1177,7 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t *
float ax = fabsf(x[i]); float ax = fabsf(x[i]);
if (ax > amax) { amax = ax; max = x[i]; } if (ax > amax) { amax = ax; max = x[i]; }
} }
if (!amax) { // all zero if (amax < 1e20f) { // all zero
for (int i = 0; i < n; ++i) { L[i] = 0; } for (int i = 0; i < n; ++i) { L[i] = 0; }
return 0.f; return 0.f;
} }
@ -2653,7 +2653,7 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
} }
if (!max_abs_scale) { if (max_abs_scale < 1e-20f) {
memset(&y[i], 0, sizeof(block_q6_K)); memset(&y[i], 0, sizeof(block_q6_K));
y[i].d = GGML_FP32_TO_FP16(0.f); y[i].d = GGML_FP32_TO_FP16(0.f);
x += QK_K; x += QK_K;
@ -2805,7 +2805,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
} }
if (!max_abs_scale) { if (max_abs_scale < 1e-20f) {
memset(&y[i], 0, sizeof(block_q6_K)); memset(&y[i], 0, sizeof(block_q6_K));
y[i].d = GGML_FP32_TO_FP16(0.f); y[i].d = GGML_FP32_TO_FP16(0.f);
x += QK_K; x += QK_K;
@ -13213,7 +13213,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
} }
float max = xval[0]; float max = xval[0];
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]); for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
if (!max) { if (max < 1e-20f) {
scales[ib] = 0; scales[ib] = 0;
memset(L, 0, 32); memset(L, 0, 32);
continue; continue;
@ -13941,7 +13941,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
} }
float max = fabsf(xb[0]); float max = fabsf(xb[0]);
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i])); for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
if (!max) { if (max < 1e-20f) {
scales[ib] = 0; scales[ib] = 0;
memset(L, 1, block_size); memset(L, 1, block_size);
continue; continue;
@ -14205,7 +14205,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
amax = ax; max = xb[j]; amax = ax; max = xb[j];
} }
} }
if (!amax) { if (amax < 1e-20f) {
scales[ib] = 0; scales[ib] = 0;
continue; continue;
} }
@ -14426,7 +14426,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
} }
float max = xval[0]; float max = xval[0];
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]); for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
if (!max) { if (max < 1e-20f) {
scales[ib] = 0; scales[ib] = 0;
continue; continue;
} }

View file

@ -49,6 +49,15 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
t.join(); t.join();
} }
#if 0
// test quantization with very small values that may result in nan scales due to division by zero
if (ggml_is_quantized(tensor->type)) {
for (int i = 0; i < 256; i++) {
data[i] = 1e-24f;
}
}
#endif
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) { if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float)); ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_BF16) { } else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_BF16) {