From c4e8eb728adc8b9a9963219f5ef16452e38ce42d Mon Sep 17 00:00:00 2001 From: kalomaze <66376113+kalomaze@users.noreply.github.com> Date: Sun, 14 Jan 2024 07:43:32 -0600 Subject: [PATCH] Round away from zero test --- ggml-quants.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 601d155d7..bcb265a1b 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -500,8 +500,9 @@ void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict const float x0 = x[i*qk + 0 + j]*id; const float x1 = x[i*qk + qk/2 + j]*id; - const uint8_t xi0 = MIN(15, (int8_t)(x0 + 8.5f)); - const uint8_t xi1 = MIN(15, (int8_t)(x1 + 8.5f)); + // Experimental change that rounds away from absolute zero instead of + const uint8_t xi0 = MIN(15, (int8_t)(x0 + (x0 >= 0 ? 8.0f : 9.0f))); + const uint8_t xi1 = MIN(15, (int8_t)(x1 + (x1 >= 0 ? 8.0f : 9.0f))); y[i].qs[j] = xi0; y[i].qs[j] |= xi1 << 4;