From 820665f3a10b3730d06ef0443843cb1e2fc1f3e4 Mon Sep 17 00:00:00 2001 From: AndreasKunar Date: Wed, 17 Jul 2024 09:56:54 +0200 Subject: [PATCH] Revert "Improvements for Windows with Snapdragon X" This reverts commit bf21397ae5ea7c73d3494db3b91505599909227d. --- cmake/arm64-windows-llvm.cmake | 3 +-- docs/build.md | 9 +-------- ggml/src/ggml-aarch64.c | 12 ++++++------ 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/cmake/arm64-windows-llvm.cmake b/cmake/arm64-windows-llvm.cmake index 82ca42d20..802379680 100644 --- a/cmake/arm64-windows-llvm.cmake +++ b/cmake/arm64-windows-llvm.cmake @@ -9,8 +9,7 @@ set( CMAKE_CXX_COMPILER clang++ ) set( CMAKE_C_COMPILER_TARGET ${target} ) set( CMAKE_CXX_COMPILER_TARGET ${target} ) -# march for Snapdragon X should be 8.7-a, but this currently breaks Q_4_0_4_4 acceleration, 8.5 works -set( arch_c_flags "-march=armv8.5-a -fvectorize -ffp-model=fast -fno-finite-math-only" ) +set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" ) set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" ) set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) diff --git a/docs/build.md b/docs/build.md index dfff7cb05..916fcf22d 100644 --- a/docs/build.md +++ b/docs/build.md @@ -16,7 +16,7 @@ In order to build llama.cpp you have four different options. make ``` - - On Windows (x86/x64 only, arm64 requires cmake): + - On Windows: 1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases). 2. Extract `w64devkit` on your pc. @@ -45,13 +45,6 @@ In order to build llama.cpp you have four different options. - For `Q4_0_4_4` quantization type build, add the `-DGGML_LLAMAFILE=OFF` cmake option. For example, use `cmake -B build -DGGML_LLAMAFILE=OFF`. - For faster compilation, add the `-j` argument to run multiple jobs in parallel. For example, `cmake --build build --config Release -j 8` will run 8 jobs in parallel. - For faster repeated compilation, install [ccache](https://ccache.dev/). - - For Windows: - - Install cmake e.g. via `winget install cmake`: - - As alternative to the w64devkit mentioned in "using make" above, install MSVC (e.g. via Visual Studio 2022 Community Edition). - - For Windows on ARM you need MSVC installed and _additonally_: - - Install [clang via LLVM for woa64](https://releases.llvm.org) to enable better ARM optimizations (clang needs the MSVC backend). - - For using clang, the first build step needs to be `cmake --preset arm64-windows-llvm-release` (instead of the `cmake -B ...` which defaults to MSVC). - - Note: Building for ARM can also just be done with MSVC (without installing clang or using the preset), but this e.g. does not support Q_4_0_4_4 acceleration, because the MSVC frontend cannot inline ARM assembly-code. - For debug builds, there are two cases: 1. Single-config generators (e.g. default = `Unix Makefiles`; note that they just ignore the `--config` flag): diff --git a/ggml/src/ggml-aarch64.c b/ggml/src/ggml-aarch64.c index 0c22e816a..26535b1c4 100644 --- a/ggml/src/ggml-aarch64.c +++ b/ggml/src/ggml-aarch64.c @@ -392,7 +392,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * #if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) && "__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance"); -#elif defined(__ARM_NEON) && defined(__aarch64__) && ! defined(_MSC_VER) +#elif defined(__ARM_NEON) && defined(__aarch64__) const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -501,7 +501,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void * "__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance"); } #endif -#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! defined(_MSC_VER) +#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -613,7 +613,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void * UNUSED(ncols_interleaved); UNUSED(blocklen); -#if defined(__ARM_FEATURE_SVE) && ! defined(_MSC_VER) +#if defined(__ARM_FEATURE_SVE) if (svcntw() == 8) { const void * b_ptr = vx; const void * a_ptr = vy; @@ -753,7 +753,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * #if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) && "__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance"); -#elif defined(__ARM_NEON) && defined(__aarch64__) && ! defined(_MSC_VER) +#elif defined(__ARM_NEON) && defined(__aarch64__) const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -1271,7 +1271,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void * "__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance"); } #endif -#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! defined(_MSC_VER) +#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -1727,7 +1727,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void * UNUSED(ncols_interleaved); UNUSED(blocklen); -#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! defined(_MSC_VER) +#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) if (svcntw() == 8) { const void * b_ptr = vx; const void * a_ptr = vy;