From 8fd848dd30f5e370aabfad0061eced194373a4ce Mon Sep 17 00:00:00 2001 From: Dan Johansson Date: Thu, 19 Sep 2024 12:45:11 +0200 Subject: [PATCH] ggml: Extend feature detection to include non aarch64 Arm arch --- ggml/src/ggml.c | 67 +++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index bb2911139..6eabf3aa8 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -39,13 +39,13 @@ #include #endif -#if defined(__aarch64__) -struct ggml_aarch64_features_type { +#if defined(__ARM_ARCH) +struct ggml_arm_arch_features_type { int has_neon; int has_i8mm; int has_sve; int sve_cnt; -} ggml_aarch64_features = {-1, -1, -1, 0}; +} ggml_arm_arch_features = {-1, -1, -1, 0}; #endif #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8) @@ -3679,24 +3679,25 @@ static inline int ggml_up(int n, int m) { //////////////////////////////////////////////////////////////////////////////// -#if defined(__aarch64__) +#if defined(__ARM_ARCH) -#if defined(__linux__) +#if defined(__linux__) && defined(__aarch64__) #include #elif defined(__APPLE__) #include #endif -static void ggml_init_aarch64_features(void) { -#if defined(__linux__) +static void ggml_init_arm_arch_features(void) { +#if defined(__linux__) && defined(__aarch64__) uint32_t hwcap = getauxval(AT_HWCAP); uint32_t hwcap2 = getauxval(AT_HWCAP2); - ggml_aarch64_features.has_neon = !!(hwcap & HWCAP_ASIMD); - ggml_aarch64_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM); - ggml_aarch64_features.has_sve = !!(hwcap & HWCAP_SVE); + ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD); + ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM); + ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE); + #if defined(__ARM_FEATURE_SVE) - ggml_aarch64_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); + ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); #endif #elif defined(__APPLE__) int oldp = 0; @@ -3704,35 +3705,35 @@ static void ggml_init_aarch64_features(void) { if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) { oldp = 0; } - ggml_aarch64_features.has_neon = oldp; + ggml_arm_arch_features.has_neon = oldp; if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) { oldp = 0; } - ggml_aarch64_features.has_i8mm = oldp; + ggml_arm_arch_features.has_i8mm = oldp; - ggml_aarch64_features.has_sve = 0; - ggml_aarch64_features.sve_cnt = 0; + ggml_arm_arch_features.has_sve = 0; + ggml_arm_arch_features.sve_cnt = 0; #else // Run-time CPU feature detection not implemented for this platform, fallback to compile time #if defined(__ARM_NEON) - ggml_aarch64_features.has_neon = 1; + ggml_arm_arch_features.has_neon = 1; #else - ggml_aarch64_features.has_neon = 0; + ggml_arm_arch_features.has_neon = 0; #endif #if defined(__ARM_FEATURE_MATMUL_INT8) - ggml_aarch64_features.has_i8mm = 1; + ggml_arm_arch_features.has_i8mm = 1; #else - ggml_aarch64_features.has_i8mm = 0; + ggml_arm_arch_features.has_i8mm = 0; #endif #if defined(__ARM_FEATURE_SVE) - ggml_aarch64_features.has_sve = 1; - ggml_aarch64_features.sve_cnt = 16; + ggml_arm_arch_features.has_sve = 1; + ggml_arm_arch_features.sve_cnt = 16; #else - ggml_aarch64_features.has_sve = 0; - ggml_aarch64_features.sve_cnt = 0; + ggml_arm_arch_features.has_sve = 0; + ggml_arm_arch_features.sve_cnt = 0; #endif #endif } @@ -3788,8 +3789,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f); } -#if defined(__aarch64__) - ggml_init_aarch64_features(); +#if defined(__ARM_ARCH) + ggml_init_arm_arch_features(); #endif is_first_call = false; @@ -23641,16 +23642,16 @@ int ggml_cpu_has_fma(void) { } int ggml_cpu_has_neon(void) { -#if defined(__aarch64__) - return ggml_aarch64_features.has_neon; +#if defined(__ARM_ARCH) + return ggml_arm_arch_features.has_neon; #else return 0; #endif } int ggml_cpu_has_sve(void) { -#if defined(__aarch64__) - return ggml_aarch64_features.has_sve; +#if defined(__ARM_ARCH) + return ggml_arm_arch_features.has_sve; #else return 0; #endif @@ -23797,16 +23798,16 @@ int ggml_cpu_has_vsx(void) { } int ggml_cpu_has_matmul_int8(void) { -#if defined(__aarch64__) - return ggml_aarch64_features.has_i8mm; +#if defined(__ARM_ARCH) + return ggml_arm_arch_features.has_i8mm; #else return 0; #endif } int ggml_cpu_get_sve_cnt(void) { -#if defined(__aarch64__) - return ggml_aarch64_features.sve_cnt; +#if defined(__ARM_ARCH) + return ggml_arm_arch_features.sve_cnt; #else return 0; #endif