iq4_nl sgemm

This commit is contained in:
netrunnereve 2024-06-20 22:59:06 -04:00
parent b1ef562bc1
commit 6559208845

View file

@ -235,6 +235,11 @@ template <> inline __m512 load(const ggml_fp16_t *p) {
}
#endif // __AVX512F__
////////////////////////////////////////////////////////////////////////////////////////////////////
// CONSTANTS
static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
static const __m128i iq4nlt = _mm_loadu_si128((const __m128i*)kvalues_iq4nl);
////////////////////////////////////////////////////////////////////////////////////////////////////
// FLOATING POINT MATRIX MULTIPLICATION
@ -787,6 +792,20 @@ class tinyBLAS_Q0_AVX {
return _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), _mm_srli_epi16(x, 4)), _mm_set1_epi8(8));
}
inline __m256i load(const block_iq4_nl *b) {
return = MM256_SET_M128I(load1(b), load0(b));
}
inline __m128i load0(const block_iq4_nl *b) {
const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
return _mm_shuffle_epi8(iq4nlt, _mm_and_si128(_mm_set1_epi8(15), x));
}
inline __m128i load1(const block_iq4_nl *b) {
const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
return _mm_shuffle_epi8(iq4nlt, _mm_and_si128(_mm_set1_epi8(15), _mm_srli_epi16(x, 4)));
}
inline __m256 updot(__m256i u, __m256i s) {
__m256i res;
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
@ -1010,6 +1029,22 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
#endif
}
case GGML_TYPE_IQ4_NL: {
if (Btype != GGML_TYPE_Q8_0)
return false;
#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
tinyBLAS_Q0_AVX<block_iq4_nl, block_q8_0, float> tb{
k, (const block_iq4_nl *)A, lda,
(const block_q8_0 *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
return true;
#else
return false;
#endif
}
default:
return false;
}