ggml-cpu: add __ARM_FEATURE_DOTPROD guard

This commit is contained in:
Shupei Fan 2024-11-27 21:52:12 +08:00
parent f56013dcb5
commit 0aa6488a67
3 changed files with 25 additions and 9 deletions

View file

@ -91,6 +91,7 @@ extern "C" {
GGML_BACKEND_API int ggml_cpu_has_neon (void);
GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
GGML_BACKEND_API int ggml_cpu_has_sve (void);
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes

View file

@ -530,7 +530,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
if (ggml_cpu_has_neon()) {
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
@ -1017,8 +1017,8 @@ void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * restrict s, size_t bs, const void
UNUSED(ncols_interleaved);
UNUSED(blocklen);
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
if (ggml_cpu_has_neon()) {
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD)
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
const int8x16_t kvalues = vld1q_s8(kvalues_iq4nl);
const block_q8_0 * a_ptr = (const block_q8_0 *) vy;
float * res_ptr = s;
@ -1115,7 +1115,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
UNUSED(blocklen);
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
if (ggml_cpu_has_neon()) {
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
@ -3504,8 +3504,8 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * restrict s, size_t bs, const void
UNUSED(ncols_interleaved);
UNUSED(blocklen);
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
if (ggml_cpu_has_neon()) {
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD)
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
const int8x16_t kvalues = vld1q_s8(kvalues_iq4nl);
for (int y = 0; y < nr / 4; y++) {
@ -3834,11 +3834,11 @@ enum ggml_type ggml_aarch64_get_optimal_repack_type(const struct ggml_tensor * c
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
return GGML_TYPE_Q4_0_4_8;
}
if (ggml_cpu_has_neon()) {
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
return GGML_TYPE_Q4_0_4_4;
}
} else if (cur->type == GGML_TYPE_IQ4_NL) {
if (ggml_cpu_has_neon()) {
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
return GGML_TYPE_IQ4_NL_4_4;
}
}

View file

@ -109,10 +109,11 @@ static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
#if defined(__ARM_ARCH)
struct ggml_arm_arch_features_type {
int has_neon;
int has_dotprod;
int has_i8mm;
int has_sve;
int sve_cnt;
} ggml_arm_arch_features = {-1, -1, -1, 0};
} ggml_arm_arch_features = {-1, -1, -1, -1, 0};
#endif
@ -2448,6 +2449,7 @@ static void ggml_init_arm_arch_features(void) {
uint32_t hwcap2 = getauxval(AT_HWCAP2);
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
ggml_arm_arch_features.has_dotprod = !!(hwcap && HWCAP_ASIMDDP);
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
@ -2462,6 +2464,11 @@ static void ggml_init_arm_arch_features(void) {
}
ggml_arm_arch_features.has_neon = oldp;
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) != 0) {
oldp = 0;
}
ggml_arm_arch_features.has_dotprod = oldp;
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
oldp = 0;
}
@ -13890,6 +13897,14 @@ int ggml_cpu_has_neon(void) {
#endif
}
int ggml_cpu_has_dotprod(void) {
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_DOTPROD)
return ggml_arm_arch_features.has_dotprod;
#else
return 0;
#endif
}
int ggml_cpu_has_sve(void) {
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
return ggml_arm_arch_features.has_sve;