From 50ea0027a4f50ed0f2116f6120751116a5f8f135 Mon Sep 17 00:00:00 2001 From: anzz1 Date: Wed, 22 Mar 2023 03:46:48 +0200 Subject: [PATCH] Enable Fused-Multiply-Add (FMA) instructions on MSVC __FMA__ macro does not exist in MSVC --- ggml.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ggml.c b/ggml.c index ec00e6317..7edcf4c2f 100644 --- a/ggml.c +++ b/ggml.c @@ -407,9 +407,16 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float); #define QK 32 +#if __AVX2__ || __AVX512F__ + +// __FMA__ is not defined in MSVC, however it is implied with AVX2/AVX512 +#if defined(_MSC_VER) && !defined(__FMA__) +#define __FMA__ +#endif + // AVX routines provided by GH user Const-me // ref: https://github.com/ggerganov/ggml/pull/27#issuecomment-1464934600 -#if __AVX2__ || __AVX512F__ + // Unpack 32 4-bit fields into 32 bytes // The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval static inline __m256i bytesFromNibbles( const uint8_t* rsi )