Compare commits

...
Sign in to create a new pull request.

4 commits

Author SHA1 Message Date
Georgi Gerganov
007f2ece0a
cmake : provide binary dir 2024-05-18 10:50:46 +03:00
Georgi Gerganov
99d1e7eb8a
android : do not fetch, use add_subdirectory instead 2024-05-18 09:33:16 +03:00
Georgi Gerganov
2117b30380
ggml : disable SIMD exp and silu for 32-bit ARM
ggml-ci
2024-05-17 15:47:56 +03:00
Georgi Gerganov
8725937362
android : use "ci-android" branch for CI 2024-05-17 15:47:44 +03:00
2 changed files with 13 additions and 11 deletions

View file

@ -12,15 +12,17 @@ cmake_minimum_required(VERSION 3.22.1)
# build script scope).
project("llama-android")
include(FetchContent)
FetchContent_Declare(
llama
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
GIT_TAG master
)
#include(FetchContent)
#FetchContent_Declare(
# llama
# GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
# GIT_TAG ci-android
#)
#
## Also provides "common"
#FetchContent_MakeAvailable(llama)
# Also provides "common"
FetchContent_MakeAvailable(llama)
add_subdirectory(../../../../../../ please-work)
# Creates and names a library, sets it as either STATIC
# or SHARED, and provides the relative paths to its source code.

6
ggml.c
View file

@ -2076,7 +2076,7 @@ inline static float ggml_silu_f32(float x) {
return x/(1.0f + expf(-x));
}
#if defined(__ARM_NEON)
#if defined(__ARM_NEON) && defined(__aarch64__)
// adapted from arm limited optimized routine
// the maximum error is 1.45358 plus 0.5 ulps
@ -2288,7 +2288,7 @@ static void ggml_vec_silu_f32(const int n, float * y, const float * x) {
for (; i + 3 < n; i += 4) {
_mm_storeu_ps(y + i, ggml_v_silu(_mm_loadu_ps(x + i)));
}
#elif defined(__ARM_NEON)
#elif defined(__ARM_NEON) && defined(__aarch64__)
for (; i + 3 < n; i += 4) {
vst1q_f32(y + i, ggml_v_silu(vld1q_f32(x + i)));
}
@ -2335,7 +2335,7 @@ static ggml_float ggml_vec_soft_max_f32(const int n, float * y, const float * x,
#endif
sum += (ggml_float)_mm_cvtss_f32(val);
}
#elif defined(__ARM_NEON)
#elif defined(__ARM_NEON) && defined(__aarch64__)
for (; i + 3 < n; i += 4) {
float32x4_t val = ggml_v_expf(vsubq_f32(vld1q_f32(x + i),
vdupq_n_f32(max)));