Enable F16C/CVT16 vector extensions on MSVC

__F16C__ macro does not exist in MSVC, but is implied with AVX2/AVX512
2023-03-22 04:05:49 +02:00 · 2023-03-22 04:05:49 +02:00 · c43d45696f
commit c43d45696f
parent 50ea0027a4
1 changed files with 11 additions and 8 deletions
--- a/ggml.c
+++ b/ggml.c
@ -79,6 +79,16 @@ static int sched_yield (void) {
 typedef void* thread_ret_t;
 #endif

+// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
+#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
+#ifndef __FMA__
+#define __FMA__
+#endif
+#ifndef __F16C__
+#define __F16C__
+#endif
+#endif
+
 #ifdef __HAIKU__
 #define static_assert(cond, msg) _Static_assert(cond, msg)
 #endif
@ -407,16 +417,9 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);

 #define QK 32

-#if __AVX2__ || __AVX512F__
-
-// __FMA__ is not defined in MSVC, however it is implied with AVX2/AVX512
-#if defined(_MSC_VER) && !defined(__FMA__)
-#define __FMA__
-#endif
-
 // AVX routines provided by GH user Const-me
 // ref: https://github.com/ggerganov/ggml/pull/27#issuecomment-1464934600
-
+#if __AVX2__ || __AVX512F__
 // Unpack 32 4-bit fields into 32 bytes
 // The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval
 static inline __m256i bytesFromNibbles( const uint8_t* rsi )