From f07e570c032b17c1a0a5a8ca6da7339929e83ea3 Mon Sep 17 00:00:00 2001 From: slaren Date: Sat, 18 May 2024 01:15:34 +0200 Subject: [PATCH] use higher eps only for the quants that need it ggml-ci --- ggml-quants.c | 18 +++++++++++------- tests/test-backend-ops.cpp | 11 ++++++++++- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 6f26e490e..16b4e52ad 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -14,7 +14,11 @@ #include // for qsort #include // for GGML_ASSERT -#define GROUP_MAX_EPS 1e-7f +#define GROUP_MAX_EPS 1e-15f +#define GROUP_MAX_EPS_IQ3_XXS 1e-8f +#define GROUP_MAX_EPS_IQ2_S 1e-8f +#define GROUP_MAX_EPS_IQ1_M 1e-7f +#define GROUP_MAX_EPS_IQ1_S 1e-12f #if defined(_MSC_VER) // disable "possible loss of data" to avoid warnings for hundreds of casts @@ -1648,7 +1652,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t * break; } } - return sumlx / suml2; + return sumlx/suml2; } static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restrict y, int k, const float * restrict quant_weights) { @@ -12598,7 +12602,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict } float max = xval[0]; for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]); - if (!max) { + if (max < GROUP_MAX_EPS) { scales[ib] = 0; memset(L, 0, 32); continue; @@ -13215,7 +13219,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v } float max = xval[0]; for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]); - if (max < GROUP_MAX_EPS) { + if (max < GROUP_MAX_EPS_IQ3_XXS) { scales[ib] = 0; memset(L, 0, 32); continue; @@ -13755,7 +13759,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]); float max = fabsf(xb[0]); for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i])); - if (max < GROUP_MAX_EPS) { + if (max < GROUP_MAX_EPS_IQ1_S) { scales[ib] = 0; memset(L, 1, block_size); continue; @@ -13943,7 +13947,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy } float max = fabsf(xb[0]); for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i])); - if (max < GROUP_MAX_EPS) { + if (max < GROUP_MAX_EPS_IQ1_M) { scales[ib] = 0; memset(L, 1, block_size); continue; @@ -14428,7 +14432,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy } float max = xval[0]; for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]); - if (max < GROUP_MAX_EPS) { + if (max < GROUP_MAX_EPS_IQ2_S) { scales[ib] = 0; continue; } diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index dd845a265..c74e253db 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -16,6 +16,7 @@ #include #include + static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { // static RNG initialization (revisit if n_threads stops being constant) static const size_t n_threads = std::thread::hardware_concurrency(); @@ -50,10 +51,17 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m } #if 0 + const char * val_str = getenv("GGML_TEST_EPS"); + float val = 1e-9f; + if (val_str != nullptr) { + val = std::stof(val_str); + printf("GGML_TEST_EPS=%e\n", val); + } + // test quantization with very small values that may result in nan scales due to division by zero if (ggml_is_quantized(tensor->type)) { for (int i = 0; i < 256; i++) { - data[i] = 1e-7f; + data[i] = val; } } #endif @@ -73,6 +81,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m } } ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im); + GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size())); ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size()); } else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) { // This is going to create some weird integers though.