From 6f997d299a34c450235545ffcd5127ed9804407f Mon Sep 17 00:00:00 2001 From: syx Date: Tue, 12 Dec 2023 13:50:25 +0800 Subject: [PATCH] add fall back for axpy mulmat --- ggml.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ggml.c b/ggml.c index 5f1da9d16..f1326096d 100644 --- a/ggml.c +++ b/ggml.c @@ -14298,7 +14298,7 @@ static void ggml_axpy_normal_f16(const int n, const ggml_fp16_t * vx, const ggml } } static void ggml_axpy_avx_f16(const int n, const ggml_fp16_t * restrict vx, const ggml_fp16_t * restrict vy, void* restrict vz, ggml_fp16_t alpha) { - +#if defined(__AVX2__) float *result = (float *)vz; float alpha_f32 = GGML_FP16_TO_FP32(alpha); __m256 scale = _mm256_set1_ps(alpha_f32); // 创建scale向量 @@ -14309,6 +14309,13 @@ static void ggml_axpy_avx_f16(const int n, const ggml_fp16_t * restrict vx, cons __m256 res = _mm256_fmadd_ps(vx_f32, scale, vy_f32); // 执行向量加法和乘法操作 _mm256_storeu_ps((float*)(&result[i]), res); // 存储结果 } +#else + float *res = (float *)vz; + float alpha_convert = GGML_FP16_TO_FP32(alpha); + for (int i = 0; i < n; i++) { + res[i] = res[i] + (GGML_FP16_TO_FP32(vx[i])*alpha_convert); + } +#endif } atomic_flag g_axpy_dense_lock = ATOMIC_FLAG_INIT;