Fix bug in qequantize_row_iq2_xxs

The 0.25f factor was missing.
Great detective work by @ggerganov!
This commit is contained in:
Iwan Kawrakow 2024-01-08 12:12:00 +01:00
parent 61c04053a4
commit 7db967e811

View file

@ -2442,7 +2442,7 @@ void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y
for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
memcpy(aux32, x[i].qs + 4*ib32, 2*sizeof(uint32_t));
const float db = d * (0.5f + (aux32[1] >> 28));
const float db = d * (0.5f + (aux32[1] >> 28)) * 0.25f;
for (int l = 0; l < 4; ++l) {
const uint8_t * grid = (const uint8_t *)(iq2xxs_grid + aux8[l]);
const uint8_t signs = ksigns_iq2xs[(aux32[1] >> 7*l) & 127];