Address review comments

This commit is contained in:
Justine Tunney 2024-04-21 11:56:15 -07:00
parent 3e4fc41505
commit 9d4d14c9b0
No known key found for this signature in database
GPG key ID: 52965314629936D4
2 changed files with 9 additions and 13 deletions

View file

@ -43,17 +43,11 @@ else()
set(LLAMA_METAL_DEFAULT OFF)
endif()
# TODO: fix this for Android CI
# https://github.com/ggerganov/llama.cpp/pull/6716#issuecomment-2061509191
#if (CMAKE_SYSTEM_NAME MATCHES "ANDROID")
# set(LLAMA_LLAMAFILE_DEFAULT OFF)
#else()
# set(LLAMA_LLAMAFILE_DEFAULT ON)
#endif()
# TODO: temporary disable until MoE is fixed
# https://github.com/ggerganov/llama.cpp/pull/6716
if (CMAKE_SYSTEM_NAME MATCHES "ANDROID")
set(LLAMA_LLAMAFILE_DEFAULT OFF)
else()
set(LLAMA_LLAMAFILE_DEFAULT ON)
endif()
# general
option(BUILD_SHARED_LIBS "build shared libraries" OFF)

View file

@ -512,8 +512,8 @@ class tinyBLAS_Q0_ARM {
for (int job = start; job < end; ++job) {
int ii = m0 + job / xtiles * RM;
int jj = n0 + job % xtiles * RN;
D Cv[RN][RM] = {};
for (int l = 0; l < k; l += KN)
float32x4_t Cv[RN][RM] = {};
for (int l = 0; l < k; ++l)
for (int j = 0; j < RN; ++j)
for (int i = 0; i < RM; ++i)
Cv[j][i] = vmlaq_n_f32(Cv[j][i],
@ -534,6 +534,7 @@ class tinyBLAS_Q0_ARM {
inline int8x16_t load_lo(const block_q8_0 *b) {
return vld1q_s8(b->qs);
}
inline int8x16_t load_hi(const block_q8_0 *b) {
return vld1q_s8(b->qs + 16);
}
@ -543,6 +544,7 @@ class tinyBLAS_Q0_ARM {
vdupq_n_u8(0x0f))),
vdupq_n_s8(0x8));
}
inline int8x16_t load_hi(const block_q4_0 *b) {
return vsubq_s8(vreinterpretq_s8_u8(vshrq_n_u8(vld1q_u8(b->qs), 4)),
vdupq_n_s8(0x8));