diff --git a/.gitignore b/.gitignore index 9bfe49fc2..26d8c1e7c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ models/** .vscode/** **/__pycache__/** + +**/build-info.cpp \ No newline at end of file diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 24a8f334e..d05cd6be4 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -1,17 +1,19 @@ include(CheckCXXCompilerFlag) -# Set default value for QK4_0 -set(QK4_0 "32" CACHE STRING "Quantization block size for Q4_0 (32, 64, 128, 256)") - -# Only check the value if it was explicitly set by the user -if (DEFINED QK4_0 AND NOT QK4_0 STREQUAL "32") +# Only process QK4_0 if it is explicitly set +if (DEFINED QK4_0) + # Ensure QK4_0 is an integer and within the allowed values if (NOT QK4_0 MATCHES "^(32|64|128|256)$") message(FATAL_ERROR "Invalid QK4_0 value: Must be one of {32, 64, 128, 256}") endif() - add_compile_definitions(QK4_0=${QK4_0}) -endif() -message(STATUS "QK4_0 is set to: ${QK4_0}") + # Define the macro for compilation + add_compile_definitions(QK4_0=${QK4_0}) + + message(STATUS "QK4_0 is set to: ${QK4_0}") +else() + message(STATUS "QK4_0 is not set. Using default behavior.") +endif() add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES}) diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.c b/ggml/src/ggml-cpu/ggml-cpu-quants.c index 568c22ed6..8861663af 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-quants.c +++ b/ggml/src/ggml-cpu/ggml-cpu-quants.c @@ -1741,7 +1741,7 @@ static inline __m128i get_scale_shuffle(int i) { #endif void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { - const int qk = 128; + const int qk = QK4_0; const int nb = n / qk; assert(n % qk == 0); @@ -2317,7 +2317,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r } sumf = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3); -#endif +#else for (; ib < nb; ++ib) { int sumi0 = 0; int sumi1 = 0; @@ -2333,6 +2333,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r int sumi = sumi0 + sumi1; sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); } +#endif #endif *s = sumf; }