WIP
This commit is contained in:
parent
a4ee5ca8f5
commit
23649e5416
3 changed files with 19 additions and 20 deletions
|
@ -1,12 +1,17 @@
|
|||
include(CheckCXXCompilerFlag)
|
||||
|
||||
option(CUSTOM_QK4_0 "Quantization block size for Q4_0 (32, 64, 128, 256)" 32)
|
||||
# Set default value for QK4_0
|
||||
set(QK4_0 "32" CACHE STRING "Quantization block size for Q4_0 (32, 64, 128, 256)")
|
||||
|
||||
if (NOT CUSTOM_QK4_0 MATCHES "^(32|64|128|256)$")
|
||||
message(FATAL_ERROR "Invalid CUSTOM_QK4_0 value: Must be one of {32, 64, 128, 256}")
|
||||
# Only check the value if it was explicitly set by the user
|
||||
if (DEFINED QK4_0 AND NOT QK4_0 STREQUAL "32")
|
||||
if (NOT QK4_0 MATCHES "^(32|64|128|256)$")
|
||||
message(FATAL_ERROR "Invalid QK4_0 value: Must be one of {32, 64, 128, 256}")
|
||||
endif()
|
||||
add_compile_definitions(QK4_0=${QK4_0})
|
||||
endif()
|
||||
|
||||
add_compile_definitions(CUSTOM_QK4_0=${CUSTOM_QK4_0})
|
||||
message(STATUS "QK4_0 is set to: ${QK4_0}")
|
||||
|
||||
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
|
||||
|
||||
|
|
|
@ -141,10 +141,8 @@ typedef sycl::half2 ggml_half2;
|
|||
|
||||
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
|
||||
|
||||
#ifdef CUSTOM_QK4_0
|
||||
#define QK4_0 CUSTOM_QK4_0
|
||||
#else
|
||||
#define QK4_0 32 // Default value for QK4_0
|
||||
#ifndef QK4_0
|
||||
#define QK4_0 32
|
||||
#endif
|
||||
|
||||
#if (QK4_0 != 32 && QK4_0 != 64 && QK4_0 != 128 && QK4_0 != 256)
|
||||
|
@ -192,16 +190,12 @@ typedef struct {
|
|||
} block_q5_1;
|
||||
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_half) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
||||
|
||||
#ifdef CUSTOM_QK8_0
|
||||
#define QK8_0 CUSTOM_QK4_0
|
||||
#if QK4_0 != 32
|
||||
#define QK8_0 QK4_0
|
||||
#else
|
||||
#define QK8_0 32
|
||||
#endif
|
||||
|
||||
#if (QK8_0 != 32 && QK8_0 != 64 && QK8_0 != 128 && QK8_0 != 256)
|
||||
#error "Invalid QK8_0 value: QK8_0 must be one of {32, 64, 128, 256}"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
ggml_half d; // delta
|
||||
int8_t qs[QK8_0]; // quants
|
||||
|
@ -421,7 +415,8 @@ typedef union {
|
|||
} iq1m_scale_t;
|
||||
|
||||
// Non-linear quants
|
||||
#define QK4_NL 128
|
||||
#define QK4_NL QK8_0
|
||||
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
uint8_t qs[QK4_NL/2];
|
||||
|
|
|
@ -713,7 +713,7 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int64_t k)
|
|||
|
||||
block_q8_0 * restrict y = vy;
|
||||
|
||||
#if defined(CUSTOM_QK4_0) && (CUSTOM_QK4_0 != 32)
|
||||
#if (QK8_0 != 32)
|
||||
GGML_UNUSED(nb);
|
||||
// scalar
|
||||
quantize_row_q8_0_ref(x, y, k);
|
||||
|
@ -1832,8 +1832,8 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|||
float sumf = 0;
|
||||
|
||||
|
||||
#if defined(CUSTOM_QK4_0) && (CUSTOM_QK4_0 != 32)
|
||||
// Use only the basic implementation when CUSTOM_QK4_0 is defined and not 32
|
||||
#if (QK4_0 != 32)
|
||||
// Use only the basic implementation when QK4_0 is defined and not 32
|
||||
for (; ib < nb; ++ib) {
|
||||
int sumi0 = 0;
|
||||
int sumi1 = 0;
|
||||
|
@ -2317,7 +2317,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|||
}
|
||||
|
||||
sumf = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
|
||||
#else
|
||||
#endif
|
||||
for (; ib < nb; ++ib) {
|
||||
int sumi0 = 0;
|
||||
int sumi1 = 0;
|
||||
|
@ -2333,7 +2333,6 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|||
int sumi = sumi0 + sumi1;
|
||||
sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
*s = sumf;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue