Merge pull request #1 from es0m/last-working-old-file-format

arm64 support for windows
This commit is contained in:
Eric Sommerlade 2023-09-04 10:29:01 +01:00 committed by GitHub
commit 245f9e8d82
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 4 deletions

View file

@ -465,10 +465,16 @@ if (NOT MSVC)
endif()
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM64" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
message(STATUS "ARM detected")
if (MSVC)
# TODO: arm msvc?
# x86 add_compile_options(/arch:AVX2)
add_compile_definitions(__ARM_NEON)
add_compile_definitions(__ARM_FEATURE_FMA)
add_compile_definitions(__ARM_FEATURE_DOTPROD)
#add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
add_compile_definitions(__aarch64__) # MSVC _M_ARM64
else()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
# Raspberry Pi 1, Zero

2
ggml.c
View file

@ -267,7 +267,7 @@ typedef double ggml_float;
// 16-bit float
// on Arm, we use __fp16
// on x86, we use uint16_t
#ifdef __ARM_NEON
#if defined(__ARM_NEON) && !defined(_MSC_VER)
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
//

2
ggml.h
View file

@ -270,7 +270,7 @@ extern "C" {
#if defined(__ARM_NEON) && defined(__CUDACC__)
typedef half ggml_fp16_t;
#elif defined(__ARM_NEON)
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
typedef __fp16 ggml_fp16_t;
#else
typedef uint16_t ggml_fp16_t;

View file

@ -2602,7 +2602,13 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
memcpy(utmp, x[i].scales, 12);
const uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
#ifndef _MSC_VER
uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
#else
uint32x2_t mins8;
mins8.n64_u32[0] = utmp[1] & kmask1;
mins8.n64_u32[1] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
#endif
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
utmp[0] &= kmask1;