From a456d83bbe3c4ddf77b64de9e92ef46138eb525c Mon Sep 17 00:00:00 2001 From: Holden Date: Thu, 14 Dec 2023 22:53:14 +0800 Subject: [PATCH] add fallback for m chip & fix compiler bugs (#4) --- CMakeLists.txt | 2 +- ggml.c | 37 +++++++++++++++++++++++++------------ llama.cpp | 6 +++--- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db1f42f1e..000a55c96 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,7 +37,7 @@ endif() # if (APPLE) - set(LLAMA_METAL_DEFAULT ON) + set(LLAMA_METAL_DEFAULT OFF) # metal has not been supported on Apple Silicon yet else() set(LLAMA_METAL_DEFAULT OFF) endif() diff --git a/ggml.c b/ggml.c index f1326096d..35c4349c1 100644 --- a/ggml.c +++ b/ggml.c @@ -146,7 +146,7 @@ void ggml_print_backtrace(void) { } #endif -#define GGML_PERF +// #define GGML_PERF #define GGML_DEBUG 0 #define GGML_GELU_FP16 #define GGML_GELU_QUICK_FP16 @@ -14436,6 +14436,7 @@ static void ggml_compute_forward_mul_mat_axpy_dense( // 计算剩余的元素个数 int remainder = ne00 % 8; +#if defined(__AVX2__) // 使用AVX指令进行向量化计算 for (i = 0; i < ne00 - remainder; i += 8) { __m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数 @@ -14448,10 +14449,11 @@ static void ggml_compute_forward_mul_mat_axpy_dense( for (i = ne00 - remainder; i < ne00; i++) { res[i] += tmp[i]; } - // for (i = 0; i < dst->ne[0]; i++) { - // res[i] += tmp[i]; - // } - +#else + for (i = 0; i < dst->ne[0]; i++) { + res[i] += tmp[i]; + } +#endif atomic_flag_clear(&g_axpy_dense_lock); } @@ -14586,6 +14588,7 @@ static void ggml_compute_forward_mul_mat_axpy( // 计算剩余的元素个数 int remainder = ne00 % 8; +#if defined(__AVX2__) // 使用AVX指令进行向量化计算 for (i = 0; i < ne00 - remainder; i += 8) { __m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数 @@ -14598,8 +14601,11 @@ static void ggml_compute_forward_mul_mat_axpy( for (i = ne00 - remainder; i < ne00; i++) { res[i] += tmp[i]; } - - +#else + for (i = 0; i < ne00; i++) { + res[i] += tmp[i]; + } +#endif atomic_flag_clear(&g_axpy_lock); } @@ -14733,7 +14739,7 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0( // 计算剩余的元素个数 int remainder = ne00 % 8; - +#if defined(__AVX2__) // 使用AVX指令进行向量化计算 for (i = 0; i < ne00 - remainder; i += 8) { @@ -14748,6 +14754,11 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0( { res[i] += tmp[i]; } +#else + for (i = 0; i < ne00; i++) { + res[i] += tmp[i]; + } +#endif atomic_flag_clear(&g_axpy_lock); } @@ -14869,6 +14880,7 @@ static void ggml_compute_forward_mul_mat_axpy_head( // 计算剩余的元素个数 int remainder = ne00 % 8; +#if defined(__AVX2__) // 使用AVX指令进行向量化计算 for (i = 0; i < ne00 - remainder; i += 8) { __m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数 @@ -14881,10 +14893,11 @@ static void ggml_compute_forward_mul_mat_axpy_head( for (i = ne00 - remainder; i < ne00; i++) { res[i] += tmp[i]; } - // for (i = 0; i < ne00; i++) { - // res[i] = tmp[i]; - // } - +#else + for (i = 0; i < ne00; i++) { + res[i] += tmp[i]; + } +#endif atomic_flag_clear(&g_axpy_head_lock); } diff --git a/llama.cpp b/llama.cpp index 3ad45ff2d..01123d8d9 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2737,7 +2737,7 @@ struct llama_mlp_model_loader { offset = (offset + 31) & -32; file.seek(offset, SEEK_SET); // point to the mmaped mlp model file - mlp_tensor -> data = mapping -> addr + static_cast(offset); + mlp_tensor -> data = (void *) (static_cast(mapping -> addr) + offset); file.seek(tensor_data_size, SEEK_CUR); return mlp_tensor; } @@ -2757,7 +2757,7 @@ struct llama_augmentation_model_loader { // const int64_t ggml_aux_tensor_size = 4 * (100 * 100 + 5120*40*4 * ggml_tensor_overhead() + (int64_t)13824*5120*40*4); int model_layer = model->layers.size(); int ffn_dim = model->layers[0].ffn_up->ne[1]; - const int64_t ggml_aux_tensor_size = 4 * (100 * 100 + model_layer*ffn_dim*sizeof(float) * ggml_tensor_overhead() ); + const size_t ggml_aux_tensor_size = 4 * (100 * 100 + model_layer*ffn_dim*sizeof(float) * ggml_tensor_overhead() ); printf("augmentation buffer: %ld\n", ggml_aux_tensor_size); struct ggml_init_params params = { /*.mem_size =*/ ggml_aux_tensor_size, @@ -2974,7 +2974,7 @@ static void llm_load_tensors( auto create_tensor = [&] (const std::string & name, const std::vector & ne, ggml_backend_type backend) -> ggml_tensor * { ggml_tensor * created_tensor = ml.create_tensor(ctx, name, ne, backend); if (created_tensor == nullptr) { - LLAMA_LOG_ERROR("%s: error: failed to create tensor '%s'\n", __func__, name); + LLAMA_LOG_ERROR("%s: error: failed to create tensor '%s'\n", __func__, name.c_str()); return nullptr; } if (created_tensor->backend == GGML_BACKEND_GPU || created_tensor->backend == GGML_BACKEND_GPU_SPLIT) {