Enable Fused-Multiply-Add (FMA) instructions on MSVC
__FMA__ macro does not exist in MSVC
This commit is contained in:
parent
34c1072e49
commit
50ea0027a4
1 changed files with 8 additions and 1 deletions
9
ggml.c
9
ggml.c
|
@ -407,9 +407,16 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
|||
|
||||
#define QK 32
|
||||
|
||||
#if __AVX2__ || __AVX512F__
|
||||
|
||||
// __FMA__ is not defined in MSVC, however it is implied with AVX2/AVX512
|
||||
#if defined(_MSC_VER) && !defined(__FMA__)
|
||||
#define __FMA__
|
||||
#endif
|
||||
|
||||
// AVX routines provided by GH user Const-me
|
||||
// ref: https://github.com/ggerganov/ggml/pull/27#issuecomment-1464934600
|
||||
#if __AVX2__ || __AVX512F__
|
||||
|
||||
// Unpack 32 4-bit fields into 32 bytes
|
||||
// The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval
|
||||
static inline __m256i bytesFromNibbles( const uint8_t* rsi )
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue