fix our reference to src in the second place, and use a more accurate comment.

This commit is contained in:
Julia Longtin 2024-03-24 12:41:21 +00:00
parent 3cdfc9c596
commit 3fef54f5ce

View file

@ -38,7 +38,7 @@ inline static void GGML_F32x8_VEC_ZERO(float32x8_t *target)
uint32_t mask=0x000000FF; uint32_t mask=0x000000FF;
__asm__ __volatile__ ( __asm__ __volatile__ (
"vbroadcastf32x4\t%[Z]%{uint8%},\t%%zmm8\n\t" // use an upscaling operator to clear our value. "vbroadcastf32x4\t%[Z]%{uint8%},\t%%zmm8\n\t" // use an upscaling operator to clear our register.
"kmov\t%[M],\t%%k1\n\t" "kmov\t%[M],\t%%k1\n\t"
"vmovaps\t\t%%zmm8,\t%[RES]%{%%k1%}\n\t" "vmovaps\t\t%%zmm8,\t%[RES]%{%%k1%}\n\t"
: [RES] "+m" (*target) : [RES] "+m" (*target)
@ -54,7 +54,7 @@ inline static void GGML_I32x8_VEC_ZERO(int32x8_t *target)
uint32_t mask=0x000000FF; uint32_t mask=0x000000FF;
__asm__ __volatile__ ( __asm__ __volatile__ (
"vbroadcastI32x4\t%[Z]%{uint8%},\t%%zmm8\n\t" // use an upscaling operator to clear our value. "vbroadcastI32x4\t%[Z]%{uint8%},\t%%zmm8\n\t" // use an upscaling operator to clear our register.
"kmov\t%[M],\t%%k1\n\t" "kmov\t%[M],\t%%k1\n\t"
"vmovaps\t\t%%zmm8,\t%[RES]%{%%k1%}\n\t" "vmovaps\t\t%%zmm8,\t%[RES]%{%%k1%}\n\t"
: [RES] "+m" (*target) : [RES] "+m" (*target)
@ -69,7 +69,7 @@ inline static void GGML_I32x16_VEC_ZERO(int32x16_t *target)
uint8_t zero[4] __attribute__((aligned(64))) = {0,0,0,0}; uint8_t zero[4] __attribute__((aligned(64))) = {0,0,0,0};
__asm__ __volatile__ ( __asm__ __volatile__ (
"vbroadcastI32x4\t%[Z]%{uint8%},\t%%zmm8\n\t" // use an upscaling operator to clear our value. "vbroadcastI32x4\t%[Z]%{uint8%},\t%%zmm8\n\t" // use an upscaling operator to clear our register.
"vmovaps\t\t%%zmm8,\t%[RES]\n\t" "vmovaps\t\t%%zmm8,\t%[RES]\n\t"
: [RES] "+m" (*target) : [RES] "+m" (*target)
: [Z] "m" (zero) : [Z] "m" (zero)
@ -93,7 +93,7 @@ inline static void GGML_I16x8_S_FMA_I32x8 (int16x8_t *src, int32_t scale, int32x
: [RES] "+m" (*dest) : [RES] "+m" (*dest)
: [Z] "m" (zero), : [Z] "m" (zero),
[M] "r" (mask), [M] "r" (mask),
[SRC] "m" (src), [SRC] "m" (*src),
[SCALE] "m" (scaleVec) [SCALE] "m" (scaleVec)
: "zmm0", "zmm1", "zmm2", "k1", "memory"); : "zmm0", "zmm1", "zmm2", "k1", "memory");
} }