ggml : refactor AVX part of ggml_vec_dot_q4_0()
https://github.com/ggerganov/llama.cpp/pull/617#issuecomment-1489985645
This commit is contained in:
parent
93a3169284
commit
80dad7923e
1 changed files with 10 additions and 10 deletions
20
ggml.c
20
ggml.c
|
@ -2021,17 +2021,17 @@ static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void * rest
|
||||||
bx = _mm_sub_epi8( bx, off );
|
bx = _mm_sub_epi8( bx, off );
|
||||||
by = _mm_sub_epi8( by, off );
|
by = _mm_sub_epi8( by, off );
|
||||||
|
|
||||||
// Sign-extend first 8 signed bytes into int16_t
|
// Get absolute values of x vectors
|
||||||
__m128i x16 = _mm_cvtepi8_epi16( bx );
|
const __m128i ax = _mm_sign_epi8(bx, bx);
|
||||||
__m128i y16 = _mm_cvtepi8_epi16( by );
|
|
||||||
// Compute products of int16_t integers, add pairwise
|
|
||||||
i32[j] = _mm_madd_epi16( x16, y16 );
|
|
||||||
|
|
||||||
// Sign-extend last 8 signed bytes into int16_t vectors
|
// Sign the values of the y vectors
|
||||||
x16 = _mm_cvtepi8_epi16( _mm_srli_si128( bx, 8 ) );
|
const __m128i sy = _mm_sign_epi8(by, bx);
|
||||||
y16 = _mm_cvtepi8_epi16( _mm_srli_si128( by, 8 ) );
|
|
||||||
// Accumulate products of int16_t integers
|
// Perform multiplication and create 16-bit values
|
||||||
i32[j] = _mm_add_epi32( i32[j], _mm_madd_epi16( x16, y16 ) );
|
const __m128i dot = _mm_maddubs_epi16(ax, sy);
|
||||||
|
|
||||||
|
const __m128i ones = _mm_set1_epi16(1);
|
||||||
|
i32[j] = _mm_madd_epi16(ones, dot);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert int32_t to float
|
// Convert int32_t to float
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue