try to fix compile warnings on macOS, address issue #2036
This commit is contained in:
parent
9d23589d63
commit
a952716d35
1 changed files with 10 additions and 1 deletions
11
ggml.c
11
ggml.c
|
@ -574,6 +574,8 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
||||||
#define MM256_SET_M128I(a, b) _mm256_insertf128_si256(_mm256_castsi128_si256(b), (a), 1)
|
#define MM256_SET_M128I(a, b) _mm256_insertf128_si256(_mm256_castsi128_si256(b), (a), 1)
|
||||||
|
|
||||||
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
|
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
|
||||||
|
|
||||||
|
#if !__ARM_NEON && !__AVX2__ && (__AVX__ || __SSSE3__)
|
||||||
// multiply int8_t, add results pairwise twice
|
// multiply int8_t, add results pairwise twice
|
||||||
static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
|
static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
|
||||||
// Get absolute values of x vectors
|
// Get absolute values of x vectors
|
||||||
|
@ -585,6 +587,7 @@ static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
|
||||||
const __m128i ones = _mm_set1_epi16(1);
|
const __m128i ones = _mm_set1_epi16(1);
|
||||||
return _mm_madd_epi16(ones, dot);
|
return _mm_madd_epi16(ones, dot);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#if __AVX__ || __AVX2__ || __AVX512F__
|
#if __AVX__ || __AVX2__ || __AVX512F__
|
||||||
// horizontally add 8 floats
|
// horizontally add 8 floats
|
||||||
|
@ -605,6 +608,7 @@ static inline int hsum_i32_8(const __m256i a) {
|
||||||
return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32));
|
return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !__AVX2__ && __AVX__
|
||||||
// horizontally add 4 int32_t
|
// horizontally add 4 int32_t
|
||||||
static inline int hsum_i32_4(const __m128i a) {
|
static inline int hsum_i32_4(const __m128i a) {
|
||||||
const __m128i hi64 = _mm_unpackhi_epi64(a, a);
|
const __m128i hi64 = _mm_unpackhi_epi64(a, a);
|
||||||
|
@ -612,6 +616,7 @@ static inline int hsum_i32_4(const __m128i a) {
|
||||||
const __m128i hi32 = _mm_shuffle_epi32(sum64, _MM_SHUFFLE(2, 3, 0, 1));
|
const __m128i hi32 = _mm_shuffle_epi32(sum64, _MM_SHUFFLE(2, 3, 0, 1));
|
||||||
return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32));
|
return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(__AVX2__) || defined(__AVX512F__)
|
#if defined(__AVX2__) || defined(__AVX512F__)
|
||||||
// spread 32 bits to 32 bytes { 0x00, 0xFF }
|
// spread 32 bits to 32 bytes { 0x00, 0xFF }
|
||||||
|
@ -671,6 +676,7 @@ static inline __m256 mul_sum_i8_pairs_float(const __m256i x, const __m256i y) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
static inline __m128i packNibbles( __m256i bytes )
|
static inline __m128i packNibbles( __m256i bytes )
|
||||||
{
|
{
|
||||||
// Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh
|
// Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh
|
||||||
|
@ -691,6 +697,7 @@ static inline __m128i packNibbles( __m256i bytes )
|
||||||
return _mm_packus_epi16( r0, r1 );
|
return _mm_packus_epi16( r0, r1 );
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
#elif defined(__AVX__)
|
#elif defined(__AVX__)
|
||||||
// spread 32 bits to 32 bytes { 0x00, 0xFF }
|
// spread 32 bits to 32 bytes { 0x00, 0xFF }
|
||||||
static inline __m256i bytes_from_bits_32(const uint8_t * x) {
|
static inline __m256i bytes_from_bits_32(const uint8_t * x) {
|
||||||
|
@ -759,6 +766,7 @@ static inline __m256 mul_sum_i8_pairs_float(const __m256i x, const __m256i y) {
|
||||||
return sum_i16_pairs_float(doth, dotl);
|
return sum_i16_pairs_float(doth, dotl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
static inline __m128i packNibbles( __m128i bytes1, __m128i bytes2 )
|
static inline __m128i packNibbles( __m128i bytes1, __m128i bytes2 )
|
||||||
{
|
{
|
||||||
// Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh
|
// Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh
|
||||||
|
@ -774,6 +782,7 @@ static inline __m128i packNibbles( __m128i bytes1, __m128i bytes2 )
|
||||||
|
|
||||||
return _mm_packus_epi16( bytes1, bytes2);
|
return _mm_packus_epi16( bytes1, bytes2);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
#endif
|
#endif
|
||||||
#elif defined(__SSSE3__)
|
#elif defined(__SSSE3__)
|
||||||
// horizontally add 4x4 floats
|
// horizontally add 4x4 floats
|
||||||
|
@ -1883,7 +1892,7 @@ quantize_fns_t ggml_internal_get_quantize_fn(size_t i) {
|
||||||
const __m128 t0 = _mm_add_ps(_mm256_castps256_ps128(x[0]), \
|
const __m128 t0 = _mm_add_ps(_mm256_castps256_ps128(x[0]), \
|
||||||
_mm256_extractf128_ps(x[0], 1)); \
|
_mm256_extractf128_ps(x[0], 1)); \
|
||||||
const __m128 t1 = _mm_hadd_ps(t0, t0); \
|
const __m128 t1 = _mm_hadd_ps(t0, t0); \
|
||||||
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
|
res = (ggml_float)_mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
|
||||||
}
|
}
|
||||||
// TODO: is this optimal ?
|
// TODO: is this optimal ?
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue