From ab2fae200cd076794716aa284d94bbef88cd6c36 Mon Sep 17 00:00:00 2001 From: Kunnis Date: Fri, 12 Apr 2024 20:46:01 -0500 Subject: [PATCH] When doing inference on a CPU, if you have F16C available, it's better to use AVX instead of the lookup table. --- ggml-impl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ggml-impl.h b/ggml-impl.h index e68b72877..40047a67f 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -106,6 +106,9 @@ typedef uint16_t ggml_fp16_internal_t; #ifdef _MSC_VER #define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x))) #define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0) +//If we have F16C, testing shows it's much faster than using the lookup tables. +#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x) +#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) #else #define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x) #define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)