From 375736270cec0f986483d71dea9c10125372566a Mon Sep 17 00:00:00 2001 From: Reinforce-II Date: Mon, 27 May 2024 12:44:56 +0800 Subject: [PATCH] fix performance regression on woa --- CMakeLists.txt | 5 +++++ README.md | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c5add8239..df9e5aad8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,7 @@ else() set(INS_ENB ON) endif() +option(LLAMA_LSE_ATOMICS "llama: enable LSE atomics" ON) option(LLAMA_SVE "llama: enable SVE" OFF) option(LLAMA_AVX "llama: enable AVX" ${INS_ENB}) option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB}) @@ -1002,6 +1003,10 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR add_compile_definitions(__ARM_NEON) add_compile_definitions(__ARM_FEATURE_FMA) + if (LLAMA_LSE_ATOMICS) + list(APPEND ARCH_FLAGS /arch:armv8.1) + endif() + set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS}) string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2") check_cxx_source_compiles("#include \nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD) diff --git a/README.md b/README.md index 15519c97f..fa0e62068 100644 --- a/README.md +++ b/README.md @@ -351,6 +351,13 @@ In order to build llama.cpp you have four different options. cmake --build build --config Debug ``` + **Note**: (MSVC only) for Windows on ARM builds target preceding armv8.0, e.g. snapdragon 835 (ms8998): + + ```bash + cmake -B build -DLLAMA_LSE_ATOMICS=OFF + cmake --build build --config Release + ``` + - Using `Zig` (version 0.11 or later): Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C,