From be09d983fed642160b99df07ea34c70b3fa2a707 Mon Sep 17 00:00:00 2001 From: HungMingWu Date: Mon, 10 Feb 2025 15:27:09 +0800 Subject: [PATCH] Move kvalues_iq4nl definition to ggml-common.h Signed-off-by: HungMingWu --- ggml/src/ggml-common.h | 4 ++++ ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp | 2 -- ggml/src/ggml-cpu/ggml-cpu-quants.c | 2 -- ggml/src/ggml-cpu/llamafile/sgemm.cpp | 3 ++- ggml/src/ggml-cuda/common.cuh | 3 --- ggml/src/ggml-quants.c | 2 -- ggml/src/ggml-sycl/common.hpp | 2 -- 7 files changed, 6 insertions(+), 12 deletions(-) diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h index f13fd4dea..4fd629565 100644 --- a/ggml/src/ggml-common.h +++ b/ggml/src/ggml-common.h @@ -1070,6 +1070,10 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512) 0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101, GGML_TABLE_END() +GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16) + -127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113, +GGML_TABLE_END() + #define NGRID_IQ1S 2048 #define IQ1S_DELTA 0.125f #define IQ1M_DELTA 0.125f diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp index b311a5b1c..df492474e 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp @@ -223,8 +223,6 @@ static inline __m256i mul_sum_i8_pairs_int32x8(const __m256i x, const __m256i y) } #endif -static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; - static void quantize_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) { assert(QK8_0 == 32); assert(k % QK8_0 == 0); diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.c b/ggml/src/ggml-cpu/ggml-cpu-quants.c index 27ec14935..adeadc5bb 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-quants.c +++ b/ggml/src/ggml-cpu/ggml-cpu-quants.c @@ -1639,8 +1639,6 @@ void quantize_row_tq2_0(const float * restrict x, void * restrict vy, int64_t k) quantize_row_tq2_0_ref(x, y, k); } -static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; - //===================================== Q8_K ============================================== void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) { diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp index c22a66287..e388981ec 100644 --- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp +++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp @@ -52,6 +52,8 @@ #include "ggml-impl.h" #include "ggml-cpu-impl.h" #include "ggml-quants.h" +#define GGML_COMMON_IMPL_CPP +#include "ggml-common.h" #include #include @@ -284,7 +286,6 @@ template <> inline __m256bh load(const float *p) { // CONSTANTS #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) -static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl); #endif diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 174916bc9..1a638d5b6 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -358,9 +358,6 @@ static __device__ __forceinline__ int ggml_cuda_dp4a(const int a, const int b, i #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) } -// TODO: move to ggml-common.h -static constexpr __device__ int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; - typedef void (*dequantize_kernel_t)(const void * vx, const int64_t ib, const int iqs, dfloat2 & v); static __device__ __forceinline__ float get_alibi_slope( diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index 7918388ae..3e7d54170 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -2431,8 +2431,6 @@ void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, in } } -static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; - void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y, int64_t k) { assert(k % QK4_NL == 0); const int64_t nb = k / QK4_NL; diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index abad847ca..4d5d64cbe 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -139,8 +139,6 @@ typedef sycl::float2 dfloat2; #define MMVQ_MAX_BATCH_SIZE 8 -static const int8_t kvalues_iq4nl[16]={-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; - static int g_all_sycl_device_count = -1; static bool g_ggml_backend_sycl_buffer_type_initialized = false;