ggml : fix quantize_row_q8_0() ARM_NEON rounding
This commit is contained in:
parent
801aab14aa
commit
59fb9e9eb8
1 changed files with 1 additions and 2 deletions
3
ggml.c
3
ggml.c
|
@ -1093,8 +1093,7 @@ static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int
|
|||
|
||||
for (int l = 0; l < 8; l++) {
|
||||
const float32x4_t v = vmulq_n_f32(srcv[l], id);
|
||||
//TODO: rounding
|
||||
const int32x4_t vi = vcvtq_s32_f32(v);
|
||||
const int32x4_t vi = vcvtnq_s32_f32(v);
|
||||
|
||||
y[i].qs[4*l + 0] = vgetq_lane_s32(vi, 0);
|
||||
y[i].qs[4*l + 1] = vgetq_lane_s32(vi, 1);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue