add fallback for m chip & fix compiler bugs (#4)
This commit is contained in:
parent
e44f6401ec
commit
a456d83bbe
3 changed files with 29 additions and 16 deletions
|
@ -37,7 +37,7 @@ endif()
|
|||
#
|
||||
|
||||
if (APPLE)
|
||||
set(LLAMA_METAL_DEFAULT ON)
|
||||
set(LLAMA_METAL_DEFAULT OFF) # metal has not been supported on Apple Silicon yet
|
||||
else()
|
||||
set(LLAMA_METAL_DEFAULT OFF)
|
||||
endif()
|
||||
|
|
37
ggml.c
37
ggml.c
|
@ -146,7 +146,7 @@ void ggml_print_backtrace(void) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#define GGML_PERF
|
||||
// #define GGML_PERF
|
||||
#define GGML_DEBUG 0
|
||||
#define GGML_GELU_FP16
|
||||
#define GGML_GELU_QUICK_FP16
|
||||
|
@ -14436,6 +14436,7 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
|
|||
// 计算剩余的元素个数
|
||||
int remainder = ne00 % 8;
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// 使用AVX指令进行向量化计算
|
||||
for (i = 0; i < ne00 - remainder; i += 8) {
|
||||
__m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数
|
||||
|
@ -14448,10 +14449,11 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
|
|||
for (i = ne00 - remainder; i < ne00; i++) {
|
||||
res[i] += tmp[i];
|
||||
}
|
||||
// for (i = 0; i < dst->ne[0]; i++) {
|
||||
// res[i] += tmp[i];
|
||||
// }
|
||||
|
||||
#else
|
||||
for (i = 0; i < dst->ne[0]; i++) {
|
||||
res[i] += tmp[i];
|
||||
}
|
||||
#endif
|
||||
atomic_flag_clear(&g_axpy_dense_lock);
|
||||
|
||||
}
|
||||
|
@ -14586,6 +14588,7 @@ static void ggml_compute_forward_mul_mat_axpy(
|
|||
// 计算剩余的元素个数
|
||||
int remainder = ne00 % 8;
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// 使用AVX指令进行向量化计算
|
||||
for (i = 0; i < ne00 - remainder; i += 8) {
|
||||
__m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数
|
||||
|
@ -14598,8 +14601,11 @@ static void ggml_compute_forward_mul_mat_axpy(
|
|||
for (i = ne00 - remainder; i < ne00; i++) {
|
||||
res[i] += tmp[i];
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
for (i = 0; i < ne00; i++) {
|
||||
res[i] += tmp[i];
|
||||
}
|
||||
#endif
|
||||
atomic_flag_clear(&g_axpy_lock);
|
||||
}
|
||||
|
||||
|
@ -14733,7 +14739,7 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0(
|
|||
|
||||
// 计算剩余的元素个数
|
||||
int remainder = ne00 % 8;
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// 使用AVX指令进行向量化计算
|
||||
for (i = 0; i < ne00 - remainder; i += 8)
|
||||
{
|
||||
|
@ -14748,6 +14754,11 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0(
|
|||
{
|
||||
res[i] += tmp[i];
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < ne00; i++) {
|
||||
res[i] += tmp[i];
|
||||
}
|
||||
#endif
|
||||
atomic_flag_clear(&g_axpy_lock);
|
||||
}
|
||||
|
||||
|
@ -14869,6 +14880,7 @@ static void ggml_compute_forward_mul_mat_axpy_head(
|
|||
// 计算剩余的元素个数
|
||||
int remainder = ne00 % 8;
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// 使用AVX指令进行向量化计算
|
||||
for (i = 0; i < ne00 - remainder; i += 8) {
|
||||
__m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数
|
||||
|
@ -14881,10 +14893,11 @@ static void ggml_compute_forward_mul_mat_axpy_head(
|
|||
for (i = ne00 - remainder; i < ne00; i++) {
|
||||
res[i] += tmp[i];
|
||||
}
|
||||
// for (i = 0; i < ne00; i++) {
|
||||
// res[i] = tmp[i];
|
||||
// }
|
||||
|
||||
#else
|
||||
for (i = 0; i < ne00; i++) {
|
||||
res[i] += tmp[i];
|
||||
}
|
||||
#endif
|
||||
atomic_flag_clear(&g_axpy_head_lock);
|
||||
|
||||
}
|
||||
|
|
|
@ -2737,7 +2737,7 @@ struct llama_mlp_model_loader {
|
|||
offset = (offset + 31) & -32;
|
||||
file.seek(offset, SEEK_SET);
|
||||
// point to the mmaped mlp model file
|
||||
mlp_tensor -> data = mapping -> addr + static_cast<std::streamoff>(offset);
|
||||
mlp_tensor -> data = (void *) (static_cast<char *>(mapping -> addr) + offset);
|
||||
file.seek(tensor_data_size, SEEK_CUR);
|
||||
return mlp_tensor;
|
||||
}
|
||||
|
@ -2757,7 +2757,7 @@ struct llama_augmentation_model_loader {
|
|||
// const int64_t ggml_aux_tensor_size = 4 * (100 * 100 + 5120*40*4 * ggml_tensor_overhead() + (int64_t)13824*5120*40*4);
|
||||
int model_layer = model->layers.size();
|
||||
int ffn_dim = model->layers[0].ffn_up->ne[1];
|
||||
const int64_t ggml_aux_tensor_size = 4 * (100 * 100 + model_layer*ffn_dim*sizeof(float) * ggml_tensor_overhead() );
|
||||
const size_t ggml_aux_tensor_size = 4 * (100 * 100 + model_layer*ffn_dim*sizeof(float) * ggml_tensor_overhead() );
|
||||
printf("augmentation buffer: %ld\n", ggml_aux_tensor_size);
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ ggml_aux_tensor_size,
|
||||
|
@ -2974,7 +2974,7 @@ static void llm_load_tensors(
|
|||
auto create_tensor = [&] (const std::string & name, const std::vector<int64_t> & ne, ggml_backend_type backend) -> ggml_tensor * {
|
||||
ggml_tensor * created_tensor = ml.create_tensor(ctx, name, ne, backend);
|
||||
if (created_tensor == nullptr) {
|
||||
LLAMA_LOG_ERROR("%s: error: failed to create tensor '%s'\n", __func__, name);
|
||||
LLAMA_LOG_ERROR("%s: error: failed to create tensor '%s'\n", __func__, name.c_str());
|
||||
return nullptr;
|
||||
}
|
||||
if (created_tensor->backend == GGML_BACKEND_GPU || created_tensor->backend == GGML_BACKEND_GPU_SPLIT) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue