ggml-quants : use ceiling division when quantizing q1_3
This commit is contained in:
parent
9465ec6e12
commit
89dc3b254c
4 changed files with 12 additions and 7 deletions
|
@ -3389,8 +3389,8 @@ void quantize_row_q1_3_reference(const float * restrict x, block_q1_3 * restrict
|
|||
int xi = nearest_int(x[j]);
|
||||
uint8_t xt = xi < 0 ? 0 : xi == 0 ? 1 : 2;
|
||||
q[j] += xt * pow3[4];
|
||||
q[j] = ((uint16_t)q[j] * 256) / pow3[5];
|
||||
q[j] += (uint8_t)(q[j] != 0);
|
||||
// ceiling division
|
||||
q[j] = ((uint16_t)q[j] * 256 + (pow3[5] - 1)) / pow3[5];
|
||||
y[i].q[j] = q[j];
|
||||
}
|
||||
x += sizeof(y->q);
|
||||
|
@ -3403,8 +3403,8 @@ void quantize_row_q1_3_reference(const float * restrict x, block_q1_3 * restrict
|
|||
qb += xt * pow3[m];
|
||||
}
|
||||
x += 4;
|
||||
qb = ((uint16_t)qb * 256) / pow3[5];
|
||||
qb += (uint8_t)(qb != 0);
|
||||
// ceiling division
|
||||
qb = ((uint16_t)qb * 256 + (pow3[5] - 1)) / pow3[5];
|
||||
y[i].qs[j] = qb;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue