From 9d4d14c9b00e8fc05708140f8339d1f74be07d18 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sun, 21 Apr 2024 11:56:15 -0700 Subject: [PATCH] Address review comments --- CMakeLists.txt | 16 +++++----------- sgemm.cpp | 6 ++++-- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f134a153b..58a1805ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,17 +43,11 @@ else() set(LLAMA_METAL_DEFAULT OFF) endif() -# TODO: fix this for Android CI -# https://github.com/ggerganov/llama.cpp/pull/6716#issuecomment-2061509191 -#if (CMAKE_SYSTEM_NAME MATCHES "ANDROID") -# set(LLAMA_LLAMAFILE_DEFAULT OFF) -#else() -# set(LLAMA_LLAMAFILE_DEFAULT ON) -#endif() - -# TODO: temporary disable until MoE is fixed -# https://github.com/ggerganov/llama.cpp/pull/6716 -set(LLAMA_LLAMAFILE_DEFAULT OFF) +if (CMAKE_SYSTEM_NAME MATCHES "ANDROID") + set(LLAMA_LLAMAFILE_DEFAULT OFF) +else() + set(LLAMA_LLAMAFILE_DEFAULT ON) +endif() # general option(BUILD_SHARED_LIBS "build shared libraries" OFF) diff --git a/sgemm.cpp b/sgemm.cpp index 80c3b19a6..799723b2f 100644 --- a/sgemm.cpp +++ b/sgemm.cpp @@ -512,8 +512,8 @@ class tinyBLAS_Q0_ARM { for (int job = start; job < end; ++job) { int ii = m0 + job / xtiles * RM; int jj = n0 + job % xtiles * RN; - D Cv[RN][RM] = {}; - for (int l = 0; l < k; l += KN) + float32x4_t Cv[RN][RM] = {}; + for (int l = 0; l < k; ++l) for (int j = 0; j < RN; ++j) for (int i = 0; i < RM; ++i) Cv[j][i] = vmlaq_n_f32(Cv[j][i], @@ -534,6 +534,7 @@ class tinyBLAS_Q0_ARM { inline int8x16_t load_lo(const block_q8_0 *b) { return vld1q_s8(b->qs); } + inline int8x16_t load_hi(const block_q8_0 *b) { return vld1q_s8(b->qs + 16); } @@ -543,6 +544,7 @@ class tinyBLAS_Q0_ARM { vdupq_n_u8(0x0f))), vdupq_n_s8(0x8)); } + inline int8x16_t load_hi(const block_q4_0 *b) { return vsubq_s8(vreinterpretq_s8_u8(vshrq_n_u8(vld1q_u8(b->qs), 4)), vdupq_n_s8(0x8));