From 7733f0c76081b2a69b5f8b192db2db7c43629d58 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Mon, 25 Mar 2024 01:39:56 -0400 Subject: [PATCH] ggml : support AVX512VNNI (#6280) This change causes some quants (e.g. Q4_0, Q8_0) to go faster on some architectures (e.g. AMD Zen 4). --- ggml-quants.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-quants.c b/ggml-quants.c index 2eaca0593..f26798acc 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -132,7 +132,7 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) { } static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) { -#if __AVXVNNI__ +#if defined(__AVXVNNI__) || defined(__AVX512VNNI__) const __m256i zero = _mm256_setzero_si256(); const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy); return _mm256_cvtepi32_ps(summed_pairs);