win arm fixes

This commit is contained in:
Eric Sommerlade 2023-09-03 23:39:14 +01:00
parent dadbed99e6
commit d2a4c682ef
4 changed files with 16 additions and 4 deletions

View file

@ -421,10 +421,16 @@ if (NOT MSVC)
endif() endif()
endif() endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM64" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
message(STATUS "ARM detected") message(STATUS "ARM detected")
if (MSVC) if (MSVC)
# TODO: arm msvc? # TODO: arm msvc?
# x86 add_compile_options(/arch:AVX2)
add_compile_definitions(__ARM_NEON)
add_compile_definitions(__ARM_FEATURE_FMA)
add_compile_definitions(__ARM_FEATURE_DOTPROD)
#add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
add_compile_definitions(__aarch64__) # MSVC _M_ARM64
else() else()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
# Raspberry Pi 1, Zero # Raspberry Pi 1, Zero

2
ggml.c
View file

@ -272,7 +272,7 @@ typedef double ggml_float;
// 16-bit float // 16-bit float
// on Arm, we use __fp16 // on Arm, we use __fp16
// on x86, we use uint16_t // on x86, we use uint16_t
#ifdef __ARM_NEON #if defined(__ARM_NEON) && !defined(_MSC_VER)
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example: // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
// //

2
ggml.h
View file

@ -255,7 +255,7 @@
extern "C" { extern "C" {
#endif #endif
#ifdef __ARM_NEON #if defined(__ARM_NEON) && !defined(_MSC_VER)
// we use the built-in 16-bit float type // we use the built-in 16-bit float type
typedef __fp16 ggml_fp16_t; typedef __fp16 ggml_fp16_t;
#else #else

View file

@ -2526,7 +2526,13 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
memcpy(utmp, x[i].scales, 12); memcpy(utmp, x[i].scales, 12);
const uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)}; #ifndef _MSC_VER
uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
#else
uint32x2_t mins8;
mins8.n64_u32[0] = utmp[1] & kmask1;
mins8.n64_u32[1] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
#endif
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4); utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
utmp[0] &= kmask1; utmp[0] &= kmask1;