fix performance regression on woa
This commit is contained in:
parent
d6ef0e77dd
commit
375736270c
2 changed files with 12 additions and 0 deletions
|
@ -72,6 +72,7 @@ else()
|
|||
set(INS_ENB ON)
|
||||
endif()
|
||||
|
||||
option(LLAMA_LSE_ATOMICS "llama: enable LSE atomics" ON)
|
||||
option(LLAMA_SVE "llama: enable SVE" OFF)
|
||||
option(LLAMA_AVX "llama: enable AVX" ${INS_ENB})
|
||||
option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB})
|
||||
|
@ -1002,6 +1003,10 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR
|
|||
add_compile_definitions(__ARM_NEON)
|
||||
add_compile_definitions(__ARM_FEATURE_FMA)
|
||||
|
||||
if (LLAMA_LSE_ATOMICS)
|
||||
list(APPEND ARCH_FLAGS /arch:armv8.1)
|
||||
endif()
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
|
||||
string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
|
||||
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
|
||||
|
|
|
@ -351,6 +351,13 @@ In order to build llama.cpp you have four different options.
|
|||
cmake --build build --config Debug
|
||||
```
|
||||
|
||||
**Note**: (MSVC only) for Windows on ARM builds target preceding armv8.0, e.g. snapdragon 835 (ms8998):
|
||||
|
||||
```bash
|
||||
cmake -B build -DLLAMA_LSE_ATOMICS=OFF
|
||||
cmake --build build --config Release
|
||||
```
|
||||
|
||||
- Using `Zig` (version 0.11 or later):
|
||||
|
||||
Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue