fix vector sizes.
This commit is contained in:
parent
2a47e5f05f
commit
20c2bc53f9
1 changed files with 11 additions and 12 deletions
|
@ -15,19 +15,18 @@
|
||||||
// For block_q5_K and block_q8_K. only given the second time.
|
// For block_q5_K and block_q8_K. only given the second time.
|
||||||
#include "ggml-common.h"
|
#include "ggml-common.h"
|
||||||
|
|
||||||
|
|
||||||
// This SIMD unit can work with 32 float32s at once.
|
// This SIMD unit can work with 32 float32s at once.
|
||||||
#define GGML_F32_STEP 32
|
#define GGML_F32_STEP 32
|
||||||
// We can fit 16 of these float32s in a single vector register.
|
// We can fit 16 of these float32s in a single vector register.
|
||||||
#define GGML_F32_EPR 16
|
#define GGML_F32_EPR 16
|
||||||
|
|
||||||
typedef float float32x8_t __attribute__((vector_size (64)));
|
typedef float float32x8_t __attribute__((vector_size (32)));
|
||||||
typedef float float32x16_t __attribute__((vector_size (128)));
|
typedef float float32x16_t __attribute__((vector_size (64)));
|
||||||
typedef int8_t int8x16_t __attribute__((vector_size (32)));
|
typedef int8_t int8x16_t __attribute__((vector_size (16)));
|
||||||
typedef int16_t int16x8_t __attribute__((vector_size (32)));
|
typedef int16_t int16x8_t __attribute__((vector_size (16)));
|
||||||
typedef int16_t int16x16_t __attribute__((vector_size (64)));
|
typedef int16_t int16x16_t __attribute__((vector_size (32)));
|
||||||
typedef int32_t int32x8_t __attribute__((vector_size (64)));
|
typedef int32_t int32x8_t __attribute__((vector_size (32)));
|
||||||
typedef int32_t int32x16_t __attribute__((vector_size (128)));
|
typedef int32_t int32x16_t __attribute__((vector_size (64)));
|
||||||
|
|
||||||
/* A forward declaration, to keep GCC happy. */
|
/* A forward declaration, to keep GCC happy. */
|
||||||
void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc);
|
void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc);
|
||||||
|
@ -145,10 +144,10 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
||||||
const uint8_t * scales = (const uint8_t*)&utmp[0];
|
const uint8_t * scales = (const uint8_t*)&utmp[0];
|
||||||
const uint8_t * mins = (const uint8_t*)&utmp[2];
|
const uint8_t * mins = (const uint8_t*)&utmp[2];
|
||||||
|
|
||||||
float32x16_t sums __attribute__((aligned(128)));
|
float32x16_t sums __attribute__((aligned(64)));
|
||||||
int8x16_t aux8[QK_K/16] __attribute__((aligned(32)));
|
int8x16_t aux8[QK_K/16] __attribute__((aligned(16)));
|
||||||
int16x16_t aux16[QK_K/16] __attribute__((aligned(64)));
|
int16x16_t aux16[QK_K/16] __attribute__((aligned(32)));
|
||||||
int32x16_t aux32 __attribute__((aligned(128)));
|
int32x16_t aux32 __attribute__((aligned(64)));
|
||||||
|
|
||||||
GGML_F32x16_VEC_ZERO(&sums);
|
GGML_F32x16_VEC_ZERO(&sums);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue