formatting changes.

2024-04-03 20:24:00 +00:00 · 2024-04-03 20:24:00 +00:00 · 6f67ea886f
commit 6f67ea886f
parent 96fdd214c8
1 changed files with 36 additions and 39 deletions
--- a/ggml-phi-knc.c
+++ b/ggml-phi-knc.c
@ -6,14 +6,11 @@
 // For memcpy.
 #include <string.h>
 // This SIMD unit can work with 32 float32s at once.
 #define GGML_F32_STEP 32
 // We can fit 16 of these float32s in a single vector register.
 #define GGML_F32_EPR 16
-// a single vector. 128*32=512
+// A vector of 16 floats.
-typedef float float32x16_t __attribute__((vector_size (128)));
+typedef float float32x16_t __attribute__((vector_size (64), aligned (64)));
 #define GGML_F32x16              float32x16_t
 // A forward declaration, to keep GCC happy...
 void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc);
@ -109,7 +106,7 @@ inline static void GGML_F32x16_VEC_FMA(const float32x16_t *mvec1, const float32x
 void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc)
 {
  // our sum.
-  float32x16_t sum __attribute__((aligned(64)));
+  float32x16_t sum;
  // the number of vector-sized steps we will need to do.
  const uint32_t np = (n & ~(GGML_F32_EPR - 1));
@ -121,10 +118,10 @@ void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restri
    {
      // add the leftovers, that could not be handled by the vector loop.
      // our extended last part of x.
-      float32x16_t v1 __attribute__((aligned(64)));
+      float32x16_t v1;
      GGML_F32x16_VEC_ZERO(&v1);
      // our extended last part of y.
-      float32x16_t v2 __attribute__((aligned(64)));
+      float32x16_t v2;
      GGML_F32x16_VEC_ZERO(&v2);
      memcpy(&v1, &x[np], (n - np)*sizeof(float));