fix performance regression on woa

This commit is contained in:
Reinforce-II 2024-05-27 12:44:56 +08:00
parent d6ef0e77dd
commit 375736270c
2 changed files with 12 additions and 0 deletions

View file

@ -72,6 +72,7 @@ else()
set(INS_ENB ON)
endif()
option(LLAMA_LSE_ATOMICS "llama: enable LSE atomics" ON)
option(LLAMA_SVE "llama: enable SVE" OFF)
option(LLAMA_AVX "llama: enable AVX" ${INS_ENB})
option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB})
@ -1002,6 +1003,10 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR
add_compile_definitions(__ARM_NEON)
add_compile_definitions(__ARM_FEATURE_FMA)
if (LLAMA_LSE_ATOMICS)
list(APPEND ARCH_FLAGS /arch:armv8.1)
endif()
set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)

View file

@ -351,6 +351,13 @@ In order to build llama.cpp you have four different options.
cmake --build build --config Debug
```
**Note**: (MSVC only) for Windows on ARM builds target preceding armv8.0, e.g. snapdragon 835 (ms8998):
```bash
cmake -B build -DLLAMA_LSE_ATOMICS=OFF
cmake --build build --config Release
```
- Using `Zig` (version 0.11 or later):
Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C,