Enable Fused-Multiply-Add (FMA) instructions on MSVC

__FMA__ macro does not exist in MSVC
2023-03-22 03:46:48 +02:00 · 2023-03-22 03:46:48 +02:00 · 50ea0027a4
commit 50ea0027a4
parent 34c1072e49
1 changed files with 8 additions and 1 deletions
--- a/ggml.c
+++ b/ggml.c
@ -407,9 +407,16 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);

 #define QK 32

+#if __AVX2__ || __AVX512F__
+
+// __FMA__ is not defined in MSVC, however it is implied with AVX2/AVX512
+#if defined(_MSC_VER) && !defined(__FMA__)
+#define __FMA__
+#endif
+
 // AVX routines provided by GH user Const-me
 // ref: https://github.com/ggerganov/ggml/pull/27#issuecomment-1464934600
-#if __AVX2__ || __AVX512F__
+
 // Unpack 32 4-bit fields into 32 bytes
 // The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval
 static inline __m256i bytesFromNibbles( const uint8_t* rsi )