Arm AArch64: use __aarch64__ check to guard 64-bit neon kernels

This commit is contained in:
Dibakar Gope 2024-07-09 18:24:40 +00:00
parent a7abb78565
commit 0e84ef1aa7
2 changed files with 7 additions and 7 deletions

View file

@ -386,7 +386,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
"__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
#elif defined(__ARM_NEON)
#elif defined(__ARM_NEON) && defined(__aarch64__)
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
@ -557,7 +557,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
: [a_ptr] "r" (a_ptr), [nb] "r" (nb)
: "memory", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "x20", "x21", "x22", "x23"
);
#elif defined(__ARM_NEON)
#elif defined(__ARM_NEON) && defined(__aarch64__)
GGML_ASSERT((ggml_cpu_has_sve() || ggml_cpu_has_matmul_int8()) &&
"__ARM_FEATURE_SVE and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 quantization format for optimal "
"performance");
@ -687,7 +687,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
GGML_ASSERT(ggml_cpu_has_sve() &&
"__ARM_FEATURE_SVE not defined, use the Q4_0_4_8 quantization format for optimal performance");
#elif defined(__ARM_NEON)
#elif defined(__ARM_NEON) && defined(__aarch64__)
GGML_ASSERT((ggml_cpu_has_sve() || ggml_cpu_has_matmul_int8()) &&
"__ARM_FEATURE_SVE and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 quantization format for optimal "
"performance");
@ -747,7 +747,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
"__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
#elif defined(__ARM_NEON)
#elif defined(__ARM_NEON) && defined(__aarch64__)
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
@ -1661,7 +1661,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
: [b_ptr] "r" (b_ptr), [nr] "r" (nr), [nb] "r" (nb), [res_stride] "r" (res_stride), [nc] "r" (nc)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "x9", "x10", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
);
#elif defined(__ARM_NEON)
#elif defined(__ARM_NEON) && defined(__aarch64__)
GGML_ASSERT((ggml_cpu_has_sve() || ggml_cpu_has_matmul_int8()) &&
"__ARM_FEATURE_SVE and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 quantization format for optimal "
"performance");
@ -2146,7 +2146,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
GGML_ASSERT(ggml_cpu_has_sve() &&
"__ARM_FEATURE_SVE not defined, use the Q4_0_4_8 quantization format for optimal performance");
#elif defined(__ARM_NEON)
#elif defined(__ARM_NEON) && defined(__aarch64__)
GGML_ASSERT((ggml_cpu_has_sve() || ggml_cpu_has_matmul_int8()) &&
"__ARM_FEATURE_SVE and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 quantization format for optimal "
"performance");

View file

@ -38,7 +38,7 @@
#include <unistd.h>
#endif
#if defined(__ARM_NEON) || defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
#undef GGML_USE_LLAMAFILE
#endif