ggml : refactor AVX part of ggml_vec_dot_q4_0()

https://github.com/ggerganov/llama.cpp/pull/617#issuecomment-1489985645
This commit is contained in:
Sergey Pershukov 2023-03-30 21:44:34 +05:00
parent 93a3169284
commit 80dad7923e

20
ggml.c
View file

@ -2021,17 +2021,17 @@ static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void * rest
bx = _mm_sub_epi8( bx, off ); bx = _mm_sub_epi8( bx, off );
by = _mm_sub_epi8( by, off ); by = _mm_sub_epi8( by, off );
// Sign-extend first 8 signed bytes into int16_t // Get absolute values of x vectors
__m128i x16 = _mm_cvtepi8_epi16( bx ); const __m128i ax = _mm_sign_epi8(bx, bx);
__m128i y16 = _mm_cvtepi8_epi16( by );
// Compute products of int16_t integers, add pairwise
i32[j] = _mm_madd_epi16( x16, y16 );
// Sign-extend last 8 signed bytes into int16_t vectors // Sign the values of the y vectors
x16 = _mm_cvtepi8_epi16( _mm_srli_si128( bx, 8 ) ); const __m128i sy = _mm_sign_epi8(by, bx);
y16 = _mm_cvtepi8_epi16( _mm_srli_si128( by, 8 ) );
// Accumulate products of int16_t integers // Perform multiplication and create 16-bit values
i32[j] = _mm_add_epi32( i32[j], _mm_madd_epi16( x16, y16 ) ); const __m128i dot = _mm_maddubs_epi16(ax, sy);
const __m128i ones = _mm_set1_epi16(1);
i32[j] = _mm_madd_epi16(ones, dot);
} }
// Convert int32_t to float // Convert int32_t to float