From 6efbc690dc846d75de3520ad254b3342dd0007fb Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 29 Jan 2024 16:33:03 +0200 Subject: [PATCH] iq3_xxs: fix failing quantization test Dot product still fails. Is this real? --- tests/test-quantize-fns.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 31a78c632..91c10d12b 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -17,6 +17,7 @@ constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f; constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f; constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f; constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f; +constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS = 0.0050f; constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f; static const char* RESULT_STR[] = {"ok", "FAILED"}; @@ -135,18 +136,21 @@ int main(int argc, char * argv[]) { } const ggml_type ei = (ggml_type)i; + if (ei == GGML_TYPE_IQ2_XXS || ei == GGML_TYPE_IQ2_XS) { printf("Skip %s due to missing quantization functionality\n", ggml_type_name(ei)); continue; } printf("Testing %s\n", ggml_type_name((ggml_type) i)); + ggml_quantize_init(ei); if (qfns.from_float && qfns.to_float) { const float total_error = total_quantization_error(qfns, test_size, test_data.data()); const float max_quantization_error = - type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS : - type == GGML_TYPE_Q3_K ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS : MAX_QUANTIZATION_TOTAL_ERROR; + type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS : + type == GGML_TYPE_Q3_K ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS : + type == GGML_TYPE_IQ3_XXS ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS : MAX_QUANTIZATION_TOTAL_ERROR; failed = !(total_error < max_quantization_error); num_failed += failed; if (failed || verbose) {