fixed hardcode qk=128 bug
This commit is contained in:
parent
23649e5416
commit
37b572e196
3 changed files with 15 additions and 10 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -6,3 +6,5 @@ models/**
|
|||
.vscode/**
|
||||
|
||||
**/__pycache__/**
|
||||
|
||||
**/build-info.cpp
|
|
@ -1,17 +1,19 @@
|
|||
include(CheckCXXCompilerFlag)
|
||||
|
||||
# Set default value for QK4_0
|
||||
set(QK4_0 "32" CACHE STRING "Quantization block size for Q4_0 (32, 64, 128, 256)")
|
||||
|
||||
# Only check the value if it was explicitly set by the user
|
||||
if (DEFINED QK4_0 AND NOT QK4_0 STREQUAL "32")
|
||||
# Only process QK4_0 if it is explicitly set
|
||||
if (DEFINED QK4_0)
|
||||
# Ensure QK4_0 is an integer and within the allowed values
|
||||
if (NOT QK4_0 MATCHES "^(32|64|128|256)$")
|
||||
message(FATAL_ERROR "Invalid QK4_0 value: Must be one of {32, 64, 128, 256}")
|
||||
endif()
|
||||
add_compile_definitions(QK4_0=${QK4_0})
|
||||
endif()
|
||||
|
||||
message(STATUS "QK4_0 is set to: ${QK4_0}")
|
||||
# Define the macro for compilation
|
||||
add_compile_definitions(QK4_0=${QK4_0})
|
||||
|
||||
message(STATUS "QK4_0 is set to: ${QK4_0}")
|
||||
else()
|
||||
message(STATUS "QK4_0 is not set. Using default behavior.")
|
||||
endif()
|
||||
|
||||
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
|
||||
|
||||
|
|
|
@ -1741,7 +1741,7 @@ static inline __m128i get_scale_shuffle(int i) {
|
|||
#endif
|
||||
|
||||
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
|
||||
const int qk = 128;
|
||||
const int qk = QK4_0;
|
||||
const int nb = n / qk;
|
||||
|
||||
assert(n % qk == 0);
|
||||
|
@ -2317,7 +2317,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|||
}
|
||||
|
||||
sumf = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
|
||||
#endif
|
||||
#else
|
||||
for (; ib < nb; ++ib) {
|
||||
int sumi0 = 0;
|
||||
int sumi1 = 0;
|
||||
|
@ -2333,6 +2333,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|||
int sumi = sumi0 + sumi1;
|
||||
sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
*s = sumf;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue