Fix bug in qequantize_row_iq2_xxs
The 0.25f factor was missing. Great detective work by @ggerganov!
This commit is contained in:
parent
61c04053a4
commit
7db967e811
1 changed files with 1 additions and 1 deletions
|
@ -2442,7 +2442,7 @@ void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y
|
|||
|
||||
for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
|
||||
memcpy(aux32, x[i].qs + 4*ib32, 2*sizeof(uint32_t));
|
||||
const float db = d * (0.5f + (aux32[1] >> 28));
|
||||
const float db = d * (0.5f + (aux32[1] >> 28)) * 0.25f;
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
const uint8_t * grid = (const uint8_t *)(iq2xxs_grid + aux8[l]);
|
||||
const uint8_t signs = ksigns_iq2xs[(aux32[1] >> 7*l) & 127];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue