From fe2160eec9a9295cf746785b88c11d48ac438e9c Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 29 Jan 2024 18:21:04 +0200 Subject: [PATCH] iq3_xxs: failing tests This time the dot product accuracy did find an actual bug in the AVX2 implementation. --- ggml-quants.c | 4 ++-- tests/test-quantize-fns.cpp | 5 ++++- tests/test-quantize-perf.cpp | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 1bd659514..ac061b63a 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -8686,10 +8686,10 @@ void ggml_vec_dot_iq3_xxs_q8_K(const int n, float * restrict s, const void * res const __m256i q8_1 = _mm256_loadu_si256((const __m256i *)q8); q8 += 32; const __m256i q8_2 = _mm256_loadu_si256((const __m256i *)q8); q8 += 32; const __m256i q2_1 = _mm256_set_epi32(iq3xxs_grid[q3[7]], iq3xxs_grid[q3[6]], iq3xxs_grid[q3[5]], iq3xxs_grid[q3[4]], - iq3xxs_grid[q3[3]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[0]]); + iq3xxs_grid[q3[3]], iq3xxs_grid[q3[2]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[0]]); q3 += 8; const __m256i q2_2 = _mm256_set_epi32(iq3xxs_grid[q3[7]], iq3xxs_grid[q3[6]], iq3xxs_grid[q3[5]], iq3xxs_grid[q3[4]], - iq3xxs_grid[q3[3]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[0]]); + iq3xxs_grid[q3[3]], iq3xxs_grid[q3[2]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[0]]); q3 += 8; memcpy(aux32, gas, 8); gas += 8; const __m256i s2_1 = _mm256_set_epi64x(signs64[(aux32[0] >> 21) & 127], signs64[(aux32[0] >> 14) & 127], diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 91c10d12b..43df8022d 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -19,6 +19,7 @@ constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f; constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f; constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS = 0.0050f; constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f; +constexpr float MAX_DOT_PRODUCT_ERROR_LOWBIT = 0.04f; static const char* RESULT_STR[] = {"ok", "FAILED"}; @@ -165,7 +166,9 @@ int main(int argc, char * argv[]) { } const float vec_dot_error = dot_product_error(qfns, test_size, test_data.data(), test_data2.data()); - failed = !(vec_dot_error < MAX_DOT_PRODUCT_ERROR); + const float max_allowed_error = type == GGML_TYPE_Q2_K || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ2_XXS || + type == GGML_TYPE_IQ3_XXS ? MAX_DOT_PRODUCT_ERROR_LOWBIT : MAX_DOT_PRODUCT_ERROR; + failed = !(vec_dot_error < max_allowed_error); num_failed += failed; if (failed || verbose) { printf("%5s dot product error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error); diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 09d410b7f..8ec817344 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -278,6 +278,8 @@ int main(int argc, char * argv[]) { if (qfns.from_float && qfns.to_float) { printf("%s\n", ggml_type_name(type)); + ggml_quantize_init(type); + if (params.op_quantize_row_q_reference) { printf(" quantize_row_q_reference\n"); for (size_t size : params.test_sizes) {