diff --git a/ggml/src/ggml-aarch64.c b/ggml/src/ggml-aarch64.c index 1f28b0f57..008718634 100644 --- a/ggml/src/ggml-aarch64.c +++ b/ggml/src/ggml-aarch64.c @@ -386,7 +386,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * #if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) && "__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance"); -#elif defined(__ARM_NEON) +#elif defined(__ARM_NEON) && defined(__aarch64__) const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -557,7 +557,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void * : [a_ptr] "r" (a_ptr), [nb] "r" (nb) : "memory", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "x20", "x21", "x22", "x23" ); -#elif defined(__ARM_NEON) +#elif defined(__ARM_NEON) && defined(__aarch64__) GGML_ASSERT((ggml_cpu_has_sve() || ggml_cpu_has_matmul_int8()) && "__ARM_FEATURE_SVE and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 quantization format for optimal " "performance"); @@ -687,7 +687,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void * #if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) GGML_ASSERT(ggml_cpu_has_sve() && "__ARM_FEATURE_SVE not defined, use the Q4_0_4_8 quantization format for optimal performance"); -#elif defined(__ARM_NEON) +#elif defined(__ARM_NEON) && defined(__aarch64__) GGML_ASSERT((ggml_cpu_has_sve() || ggml_cpu_has_matmul_int8()) && "__ARM_FEATURE_SVE and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 quantization format for optimal " "performance"); @@ -747,7 +747,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * #if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) && "__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance"); -#elif defined(__ARM_NEON) +#elif defined(__ARM_NEON) && defined(__aarch64__) const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -1661,7 +1661,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void * : [b_ptr] "r" (b_ptr), [nr] "r" (nr), [nb] "r" (nb), [res_stride] "r" (res_stride), [nc] "r" (nc) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "x9", "x10", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); -#elif defined(__ARM_NEON) +#elif defined(__ARM_NEON) && defined(__aarch64__) GGML_ASSERT((ggml_cpu_has_sve() || ggml_cpu_has_matmul_int8()) && "__ARM_FEATURE_SVE and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 quantization format for optimal " "performance"); @@ -2146,7 +2146,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void * #if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) GGML_ASSERT(ggml_cpu_has_sve() && "__ARM_FEATURE_SVE not defined, use the Q4_0_4_8 quantization format for optimal performance"); -#elif defined(__ARM_NEON) +#elif defined(__ARM_NEON) && defined(__aarch64__) GGML_ASSERT((ggml_cpu_has_sve() || ggml_cpu_has_matmul_int8()) && "__ARM_FEATURE_SVE and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 quantization format for optimal " "performance"); diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index cd8a9f770..c0aced3d2 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -38,7 +38,7 @@ #include #endif -#if defined(__ARM_NEON) || defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8) +#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8) #undef GGML_USE_LLAMAFILE #endif