look at the right final memory location.

This commit is contained in:
Julia Longtin 2024-05-11 11:27:52 +00:00
parent fba57c125c
commit fa0226c8df

View file

@ -100,8 +100,8 @@ inline static void GGML_F32x16_VEC_FMA(const float32x16_t *mvec1, const float32x
"vmovaps\t\t64(%%r12),\t%%zmm4\n\t" "vmovaps\t\t64(%%r12),\t%%zmm4\n\t"
"vfmadd231ps\t%%zmm3,\t%%zmm4,\t%%zmm0\n\t" // Perform a fused multiply add "vfmadd231ps\t%%zmm3,\t%%zmm4,\t%%zmm0\n\t" // Perform a fused multiply add
// No compare. we must be three. // No compare. we must be three.
"vmovaps\t\t64(%%r10),\t%%zmm5\n\t" // Load two vectors. "vmovaps\t\t128(%%r10),\t%%zmm5\n\t" // Load two vectors.
"vmovaps\t\t64(%%r12),\t%%zmm6\n\t" "vmovaps\t\t128(%%r12),\t%%zmm6\n\t"
"vfmadd231ps\t%%zmm5,\t%%zmm6,\t%%zmm0\n\t" // Perform a fused multiply add "vfmadd231ps\t%%zmm5,\t%%zmm6,\t%%zmm0\n\t" // Perform a fused multiply add
"2:\n\t" // Label for loop end "2:\n\t" // Label for loop end
"vmovnraps\t\t%%zmm0,\t(%[RES])\n\t" // Save our results. "vmovnraps\t\t%%zmm0,\t(%[RES])\n\t" // Save our results.