diff --git a/examples/gguf-hash/deps/xxhash/xxhash.h b/examples/gguf-hash/deps/xxhash/xxhash.h index a5a955f54..323fbc906 100644 --- a/examples/gguf-hash/deps/xxhash/xxhash.h +++ b/examples/gguf-hash/deps/xxhash/xxhash.h @@ -3740,7 +3740,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can # if defined(__ARM_FEATURE_SVE) # include # include -int sve_cnt_b = 0; +int ggml_sve_cnt_b = 0; # endif # if defined(__ARM_NEON__) || defined(__ARM_NEON) \ || (defined(_M_ARM) && _M_ARM >= 7) \ @@ -5497,9 +5497,9 @@ XXH3_accumulate_512_sve( void* XXH_RESTRICT acc, const uint64_t *xinput = (const uint64_t *)(const void *)input; const uint64_t *xsecret = (const uint64_t *)(const void *)secret; svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); - if (!sve_cnt_b) - sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); - uint64_t element_count = sve_cnt_b / 8; + if (!ggml_sve_cnt_b) + ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); + uint64_t element_count = ggml_sve_cnt_b / 8; if (element_count >= 8) { svbool_t mask = svptrue_pat_b64(SV_VL8); svuint64_t vacc = svld1_u64(mask, xacc); @@ -5541,9 +5541,9 @@ XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc, const uint64_t *xinput = (const uint64_t *)(const void *)input; const uint64_t *xsecret = (const uint64_t *)(const void *)secret; svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); - if (!sve_cnt_b) - sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); - uint64_t element_count = sve_cnt_b / 8; + if (!ggml_sve_cnt_b) + ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); + uint64_t element_count = ggml_sve_cnt_b / 8; if (element_count >= 8) { svbool_t mask = svptrue_pat_b64(SV_VL8); svuint64_t vacc = svld1_u64(mask, xacc + 0); diff --git a/ggml/src/ggml-aarch64.c b/ggml/src/ggml-aarch64.c index d1c58c461..d7a608997 100644 --- a/ggml/src/ggml-aarch64.c +++ b/ggml/src/ggml-aarch64.c @@ -384,8 +384,8 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * UNUSED(blocklen); #if defined(__ARM_FEATURE_SVE) - if (sve_cnt_b == QK8_0) { - GGML_ASSERT(!(ggml_cpu_has_sve() && (sve_cnt_b == QK8_0)) && + if (ggml_sve_cnt_b == QK8_0) { + GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) && "__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance"); } #endif @@ -496,8 +496,8 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void * UNUSED(blocklen); #if defined(__ARM_FEATURE_SVE) - if (sve_cnt_b == QK8_0) { - GGML_ASSERT(!(ggml_cpu_has_sve() && (sve_cnt_b == QK8_0)) && + if (ggml_sve_cnt_b == QK8_0) { + GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) && "__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance"); } #endif @@ -614,7 +614,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void * UNUSED(blocklen); #if defined(__ARM_FEATURE_SVE) && ! ((defined(_MSC_VER)) && ! defined(__clang__)) - if (sve_cnt_b == QK8_0) { + if (ggml_sve_cnt_b == QK8_0) { const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -680,12 +680,12 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void * return; } else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) { - GGML_ASSERT((ggml_cpu_has_sve() && (sve_cnt_b == QK8_0)) && + GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) && "__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal " "performance"); } else if (ggml_cpu_has_neon()) { - GGML_ASSERT(((ggml_cpu_has_sve() && (sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) && + GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) && "__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 " "quantization format for optimal performance"); } @@ -745,8 +745,8 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * UNUSED(blocklen); #if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) - if (sve_cnt_b == QK8_0) { - GGML_ASSERT(!(ggml_cpu_has_sve() && (sve_cnt_b == QK8_0)) && + if (ggml_sve_cnt_b == QK8_0) { + GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) && "__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance"); } #endif @@ -1266,8 +1266,8 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void * UNUSED(blocklen); #if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) - if (sve_cnt_b == QK8_0) { - GGML_ASSERT(!(ggml_cpu_has_sve() && (sve_cnt_b == QK8_0)) && + if (ggml_sve_cnt_b == QK8_0) { + GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) && "__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance"); } #endif @@ -1728,7 +1728,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void * UNUSED(blocklen); #if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__)) - if (sve_cnt_b == QK8_0) { + if (ggml_sve_cnt_b == QK8_0) { const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -2139,12 +2139,12 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void * return; } else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) { - GGML_ASSERT((ggml_cpu_has_sve() && (sve_cnt_b == QK8_0)) && + GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) && "__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal " "performance"); } else if (ggml_cpu_has_neon()) { - GGML_ASSERT(((ggml_cpu_has_sve() && (sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) && + GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) && "__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 " "quantization format for optimal performance"); } diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index 64e6efc01..314ab578c 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -3818,7 +3818,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r float sumf = 0; #if defined(__ARM_FEATURE_SVE) - if (sve_cnt_b == QK8_0) { + if (ggml_sve_cnt_b == QK8_0) { const svbool_t ptrueh = svptrue_pat_b8(SV_VL16); const svbool_t ptruel = svnot_b_z(svptrue_b8(), ptrueh); @@ -5291,7 +5291,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r float sumf = 0; #if defined(__ARM_FEATURE_SVE) - if (sve_cnt_b == QK8_0) { + if (ggml_sve_cnt_b == QK8_0) { svfloat32_t sumv0 = svdup_n_f32(0.0f); svfloat32_t sumv1 = svdup_n_f32(0.0f); diff --git a/ggml/src/ggml-quants.h b/ggml/src/ggml-quants.h index 7d66f1f41..525d5ee30 100644 --- a/ggml/src/ggml-quants.h +++ b/ggml/src/ggml-quants.h @@ -128,7 +128,7 @@ void iq3xs_init_impl(int grid_size); void iq3xs_free_impl(int grid_size); #if defined(__ARM_FEATURE_SVE) -extern int sve_cnt_b; +extern int ggml_sve_cnt_b; #endif #ifdef __cplusplus diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 1718d28a4..a66363752 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -38,7 +38,7 @@ #endif #if defined(__ARM_FEATURE_SVE) -int sve_cnt_b = 0; +int ggml_sve_cnt_b = 0; #endif #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8) #undef GGML_USE_LLAMAFILE @@ -3479,8 +3479,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { ggml_assert_aligned(ctx->mem_buffer); #if defined(__ARM_FEATURE_SVE) - if (!sve_cnt_b) - sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); + if (!ggml_sve_cnt_b) + ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); #endif GGML_PRINT_DEBUG("%s: context initialized\n", __func__);