win arm fixes
This commit is contained in:
parent
dadbed99e6
commit
d2a4c682ef
4 changed files with 16 additions and 4 deletions
|
@ -421,10 +421,16 @@ if (NOT MSVC)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM64" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||||
message(STATUS "ARM detected")
|
message(STATUS "ARM detected")
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
# TODO: arm msvc?
|
# TODO: arm msvc?
|
||||||
|
# x86 add_compile_options(/arch:AVX2)
|
||||||
|
add_compile_definitions(__ARM_NEON)
|
||||||
|
add_compile_definitions(__ARM_FEATURE_FMA)
|
||||||
|
add_compile_definitions(__ARM_FEATURE_DOTPROD)
|
||||||
|
#add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
|
||||||
|
add_compile_definitions(__aarch64__) # MSVC _M_ARM64
|
||||||
else()
|
else()
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
|
||||||
# Raspberry Pi 1, Zero
|
# Raspberry Pi 1, Zero
|
||||||
|
|
2
ggml.c
2
ggml.c
|
@ -272,7 +272,7 @@ typedef double ggml_float;
|
||||||
// 16-bit float
|
// 16-bit float
|
||||||
// on Arm, we use __fp16
|
// on Arm, we use __fp16
|
||||||
// on x86, we use uint16_t
|
// on x86, we use uint16_t
|
||||||
#ifdef __ARM_NEON
|
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
||||||
|
|
||||||
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
||||||
//
|
//
|
||||||
|
|
2
ggml.h
2
ggml.h
|
@ -255,7 +255,7 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __ARM_NEON
|
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
||||||
// we use the built-in 16-bit float type
|
// we use the built-in 16-bit float type
|
||||||
typedef __fp16 ggml_fp16_t;
|
typedef __fp16 ggml_fp16_t;
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -2526,7 +2526,13 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
||||||
|
|
||||||
memcpy(utmp, x[i].scales, 12);
|
memcpy(utmp, x[i].scales, 12);
|
||||||
|
|
||||||
const uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
|
#ifndef _MSC_VER
|
||||||
|
uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
|
||||||
|
#else
|
||||||
|
uint32x2_t mins8;
|
||||||
|
mins8.n64_u32[0] = utmp[1] & kmask1;
|
||||||
|
mins8.n64_u32[1] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
|
||||||
|
#endif
|
||||||
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
|
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
|
||||||
utmp[0] &= kmask1;
|
utmp[0] &= kmask1;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue