ggml : add run-time detection of neon, i8mm and sve (#9331)
* ggml: Added run-time detection of neon, i8mm and sve Adds run-time detection of the Arm instructions set features neon, i8mm and sve for Linux and Apple build targets. * ggml: Extend feature detection to include non aarch64 Arm arch * ggml: Move definition of ggml_arm_arch_features to the global data section
This commit is contained in:
parent
89f9944981
commit
6a0f779484
5 changed files with 93 additions and 32 deletions
101
ggml/src/ggml.c
101
ggml/src/ggml.c
|
@ -39,9 +39,6 @@
|
|||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
int ggml_sve_cnt_b = 0;
|
||||
#endif
|
||||
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
#undef GGML_USE_LLAMAFILE
|
||||
#endif
|
||||
|
@ -455,6 +452,15 @@ static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
|
|||
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
||||
float ggml_table_f32_f16[1 << 16];
|
||||
|
||||
#if defined(__ARM_ARCH)
|
||||
struct ggml_arm_arch_features_type {
|
||||
int has_neon;
|
||||
int has_i8mm;
|
||||
int has_sve;
|
||||
int sve_cnt;
|
||||
} ggml_arm_arch_features = {-1, -1, -1, 0};
|
||||
#endif
|
||||
|
||||
GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
|
||||
switch (status) {
|
||||
case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
|
||||
|
@ -3673,6 +3679,66 @@ static inline int ggml_up(int n, int m) {
|
|||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__ARM_ARCH)
|
||||
|
||||
#if defined(__linux__) && defined(__aarch64__)
|
||||
#include <sys/auxv.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
static void ggml_init_arm_arch_features(void) {
|
||||
#if defined(__linux__) && defined(__aarch64__)
|
||||
uint32_t hwcap = getauxval(AT_HWCAP);
|
||||
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
||||
|
||||
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
|
||||
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
||||
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
||||
#endif
|
||||
#elif defined(__APPLE__)
|
||||
int oldp = 0;
|
||||
size_t size = sizeof(oldp);
|
||||
if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
|
||||
oldp = 0;
|
||||
}
|
||||
ggml_arm_arch_features.has_neon = oldp;
|
||||
|
||||
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
|
||||
oldp = 0;
|
||||
}
|
||||
ggml_arm_arch_features.has_i8mm = oldp;
|
||||
|
||||
ggml_arm_arch_features.has_sve = 0;
|
||||
ggml_arm_arch_features.sve_cnt = 0;
|
||||
#else
|
||||
// Run-time CPU feature detection not implemented for this platform, fallback to compile time
|
||||
#if defined(__ARM_NEON)
|
||||
ggml_arm_arch_features.has_neon = 1;
|
||||
#else
|
||||
ggml_arm_arch_features.has_neon = 0;
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
ggml_arm_arch_features.has_i8mm = 1;
|
||||
#else
|
||||
ggml_arm_arch_features.has_i8mm = 0;
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
ggml_arm_arch_features.has_sve = 1;
|
||||
ggml_arm_arch_features.sve_cnt = 16;
|
||||
#else
|
||||
ggml_arm_arch_features.has_sve = 0;
|
||||
ggml_arm_arch_features.sve_cnt = 0;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
||||
// make this function thread safe
|
||||
ggml_critical_section_start();
|
||||
|
@ -3723,6 +3789,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
||||
}
|
||||
|
||||
#if defined(__ARM_ARCH)
|
||||
ggml_init_arm_arch_features();
|
||||
#endif
|
||||
|
||||
is_first_call = false;
|
||||
}
|
||||
|
||||
|
@ -3771,12 +3841,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||
|
||||
GGML_ASSERT_ALIGNED(ctx->mem_buffer);
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
if (!ggml_sve_cnt_b) {
|
||||
ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
||||
}
|
||||
#endif
|
||||
|
||||
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
||||
|
||||
ggml_critical_section_end();
|
||||
|
@ -23578,16 +23642,16 @@ int ggml_cpu_has_fma(void) {
|
|||
}
|
||||
|
||||
int ggml_cpu_has_neon(void) {
|
||||
#if defined(__ARM_NEON)
|
||||
return 1;
|
||||
#if defined(__ARM_ARCH)
|
||||
return ggml_arm_arch_features.has_neon;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int ggml_cpu_has_sve(void) {
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
return 1;
|
||||
#if defined(__ARM_ARCH)
|
||||
return ggml_arm_arch_features.has_sve;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
|
@ -23734,11 +23798,18 @@ int ggml_cpu_has_vsx(void) {
|
|||
}
|
||||
|
||||
int ggml_cpu_has_matmul_int8(void) {
|
||||
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
return 1;
|
||||
#if defined(__ARM_ARCH)
|
||||
return ggml_arm_arch_features.has_i8mm;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int ggml_cpu_get_sve_cnt(void) {
|
||||
#if defined(__ARM_ARCH)
|
||||
return ggml_arm_arch_features.sve_cnt;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue