fixed AVX code
This commit is contained in:
parent
2d61ed7fc6
commit
c68faa991b
1 changed files with 11 additions and 0 deletions
11
ggml.c
11
ggml.c
|
@ -623,6 +623,17 @@ static inline __m256 sum_i16_pairs_float(const __m128i xh, const __m128i xl) {
|
|||
return _mm256_cvtepi32_ps(summed_pairs);
|
||||
}
|
||||
|
||||
static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) {
|
||||
const __m128i axl = _mm256_castsi256_si128(ax);
|
||||
const __m128i axh = _mm256_extractf128_si256(ax, 1);
|
||||
const __m128i syl = _mm256_castsi256_si128(sy);
|
||||
const __m128i syh = _mm256_extractf128_si256(sy, 1);
|
||||
// Perform multiplication and create 16-bit values
|
||||
const __m128i dotl = _mm_maddubs_epi16(axl, syl);
|
||||
const __m128i doth = _mm_maddubs_epi16(axh, syh);
|
||||
return sum_i16_pairs_float(doth, dotl);
|
||||
}
|
||||
|
||||
// multiply int8_t, add results pairwise twice and return as float vector
|
||||
static inline __m256 mul_sum_i8_pairs_float(const __m256i x, const __m256i y) {
|
||||
const __m128i xl = _mm256_castsi256_si128(x);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue