- Cleanup

This commit is contained in:
3ooabkhxtn 2023-05-12 09:25:00 +00:00
parent 7379dd2dba
commit 8699fd0d43

20
ggml.c
View file

@ -485,23 +485,6 @@ static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
return _mm_madd_epi16(ones, dot); return _mm_madd_epi16(ones, dot);
} }
// horizontally add 4 floats
static inline float hsum_float_4(const __m128 x) {
__m128 res =_mm_hadd_ps(x, x);
res =_mm_hadd_ps(res, res);
return _mm_cvtss_f32(res);
}
// horizontally add 2x4 floats
static inline float hsum_float_2x4(const __m128 x, const __m128 y) {
__m128 res =_mm_hadd_ps(x, y);
res =_mm_hadd_ps(res, res);
res =_mm_hadd_ps(res, res);
return _mm_cvtss_f32(res);
}
// horizontally add 4x4 floats // horizontally add 4x4 floats
static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128 c, const __m128 d) { static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128 c, const __m128 d) {
__m128 res_0 =_mm_hadd_ps(a, b); __m128 res_0 =_mm_hadd_ps(a, b);
@ -511,7 +494,7 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128
res =_mm_hadd_ps(res, res); res =_mm_hadd_ps(res, res);
return _mm_cvtss_f32(res); return _mm_cvtss_f32(res);
} }
#endif #endif
#if __AVX__ || __AVX2__ || __AVX512F__ #if __AVX__ || __AVX2__ || __AVX512F__
@ -2170,6 +2153,7 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
__m128 acc_2 = _mm_setzero_ps(); __m128 acc_2 = _mm_setzero_ps();
__m128 acc_3 = _mm_setzero_ps(); __m128 acc_3 = _mm_setzero_ps();
// First round without accumulation
{ {
_mm_prefetch(&x[1] + sizeof(block_q4_0), _MM_HINT_T0); _mm_prefetch(&x[1] + sizeof(block_q4_0), _MM_HINT_T0);
_mm_prefetch(&y[1] + sizeof(block_q8_0), _MM_HINT_T0); _mm_prefetch(&y[1] + sizeof(block_q8_0), _MM_HINT_T0);