diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 8ab1d02b0..c924ff3d2 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -307,7 +307,7 @@ int main(int argc, char ** argv) { // loop throught quantization types //for (int i = 0; i < GGML_TYPE_COUNT; i++) { - for (int i = 1; i < 2; i++) { + for (int i = 0; i < 1; i++) { if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) { continue; } @@ -315,8 +315,10 @@ int main(int argc, char ** argv) { if (i < 2 && checkNewQuantization) { //qfns.quantize_row_q = i == 0 ? kQuantizeQ4_0 : kQuantizeQ4_1; //qfns.quantize_row_q = i == 0 ? kQuantizeQ4_0 : kQuantizeQ5_1; - qfns.quantize_row_q = i == 0 ? kQuantizeQ4_0 : kQuantizeQ5_1_Fast; - if (i == 1) qfns.dequantize_row_q = kDequantizeQ5_1; + //qfns.quantize_row_q = i == 0 ? kQuantizeQ4_0 : kQuantizeQ5_1_Fast; + //if (i == 1) qfns.dequantize_row_q = kDequantizeQ5_1; + qfns.quantize_row_q = i == 0 ? kQuantizeQ4_0K : kQuantizeQ5_1; + qfns.dequantize_row_q = i == 0 ? kDequantizeQ4_0K : kDequantizeQ5_1; } if (qfns.quantize_row_q && qfns.dequantize_row_q) { if (params.verbose) { diff --git a/ggml_extra.cpp b/ggml_extra.cpp index ed6330a3e..927ab7e78 100644 --- a/ggml_extra.cpp +++ b/ggml_extra.cpp @@ -369,6 +369,15 @@ void kQuantizeQ4(const float* X, void* buffer, int k, int type) { std::memcpy(q, &result.second, sizeof(result.second)); q += sizeof(result.second); std::memcpy(q, &result.first, sizeof(result.first)); q += sizeof(result.first); for (int k=0; k> 4; + l1 -= 8; l2 -= 8; + *y++ = a*l1; *y++ = a*l2; + } + data += 8; + for (int k=0; k<8; ++k) { + int8_t l1 = data[k] & 15, l2 = data[k] >> 4; + l1 -= 8; l2 -= 8; + *y++ = b*l1; *y++ = b*l2; + } + data += 8; + } +} + } diff --git a/ggml_extra.h b/ggml_extra.h index 7faa43801..6ded61657 100644 --- a/ggml_extra.h +++ b/ggml_extra.h @@ -28,6 +28,9 @@ void kQuantizeQ5_1_Fast(const float* GGML_RESTRICT x, void* GGML_RESTRICT y, int size_t kQuantizeQ5_1H_Fast(const float* GGML_RESTRICT x, void* GGML_RESTRICT y, int k, int64_t* hist); void kDequantizeQ5_1(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void kQuantizeQ4_0K(const float* GGML_RESTRICT x, void* GGML_RESTRICT y, int k); +void kDequantizeQ4_0K(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); + #ifdef __cplusplus } #endif