ggml : fix quantize_row_q8_0() ARM_NEON rounding

This commit is contained in:
Georgi Gerganov 2023-04-14 21:27:55 +03:00
parent 2c4f9b658d
commit 312a927f0b
No known key found for this signature in database
GPG key ID: 449E073F9DC10735

3
ggml.c
View file

@ -1102,8 +1102,7 @@ static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int
for (int l = 0; l < 8; l++) {
const float32x4_t v = vmulq_n_f32(srcv[l], id);
//TODO: rounding
const int32x4_t vi = vcvtq_s32_f32(v);
const int32x4_t vi = vcvtnq_s32_f32(v);
y[i].qs[4*l + 0] = vgetq_lane_s32(vi, 0);
y[i].qs[4*l + 1] = vgetq_lane_s32(vi, 1);