fix performance regression on woa

This commit is contained in:
Reinforce-II 2024-05-27 12:44:56 +08:00
parent d6ef0e77dd
commit 375736270c
2 changed files with 12 additions and 0 deletions

View file

@ -72,6 +72,7 @@ else()
set(INS_ENB ON) set(INS_ENB ON)
endif() endif()
option(LLAMA_LSE_ATOMICS "llama: enable LSE atomics" ON)
option(LLAMA_SVE "llama: enable SVE" OFF) option(LLAMA_SVE "llama: enable SVE" OFF)
option(LLAMA_AVX "llama: enable AVX" ${INS_ENB}) option(LLAMA_AVX "llama: enable AVX" ${INS_ENB})
option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB}) option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB})
@ -1002,6 +1003,10 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR
add_compile_definitions(__ARM_NEON) add_compile_definitions(__ARM_NEON)
add_compile_definitions(__ARM_FEATURE_FMA) add_compile_definitions(__ARM_FEATURE_FMA)
if (LLAMA_LSE_ATOMICS)
list(APPEND ARCH_FLAGS /arch:armv8.1)
endif()
set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2") string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD) check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)

View file

@ -351,6 +351,13 @@ In order to build llama.cpp you have four different options.
cmake --build build --config Debug cmake --build build --config Debug
``` ```
**Note**: (MSVC only) for Windows on ARM builds target preceding armv8.0, e.g. snapdragon 835 (ms8998):
```bash
cmake -B build -DLLAMA_LSE_ATOMICS=OFF
cmake --build build --config Release
```
- Using `Zig` (version 0.11 or later): - Using `Zig` (version 0.11 or later):
Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C, Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C,