From e95a8336d58da45fae6fce2175924cb616a0aa39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=2E=20Hitland?= Date: Sun, 16 Apr 2023 00:37:16 +0200 Subject: [PATCH] test-quantize: fix for q8_0 intermediates --- tests/test-quantize-fns.cpp | 5 ++--- tests/test-quantize-perf.cpp | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 2604feabb..5a5410152 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -12,7 +12,6 @@ const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001; const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002; -// TODO: check why q4_1 is high const float MAX_DOT_PRODUCT_ERROR = 0.02; const char* RESULT_STR[] = {"ok", "FAILED"}; @@ -71,10 +70,10 @@ float dot_product(const float * a1, const float * a2, size_t test_size) { // Total dot product error float dot_product_error(quantize_fns_t & qfns, size_t test_size, const float * test_data1, const float *test_data2) { std::vector tmp_q1(test_size); - std::vector tmp_q2(test_size); + std::vector tmp_q2(test_size*2); qfns.quantize_row_q(test_data1, tmp_q1.data(), test_size); - qfns.quantize_row_q(test_data2, tmp_q2.data(), test_size); + qfns.quantize_row_q_dot(test_data2, tmp_q2.data(), test_size); float result = INFINITY; qfns.vec_dot_q(test_size, &result, tmp_q1.data(), tmp_q2.data()); diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 612753789..883df05fe 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -30,6 +30,7 @@ struct quantize_perf_params { bool op_quantize_row_q_reference = false; bool op_quantize_row_q = false; bool op_dequantize_row_q = false; + bool op_quantize_row_q_dot = false; bool op_vec_dot_q = false; }; @@ -147,6 +148,8 @@ int main(int argc, char * argv[]) { params.op_quantize_row_q = true; } else if (op == "dequantize_row_q") { params.op_dequantize_row_q = true; + } else if (op == "quantize_row_q_dot") { + params.op_quantize_row_q_dot = true; } else if (op == "vec_dot_q") { params.op_vec_dot_q = true; } else { @@ -184,8 +187,8 @@ int main(int argc, char * argv[]) { if (params.test_sizes.empty()) { params.test_sizes.push_back(L1_SIZE); } - if (!(params.op_quantize_row_q_reference || params.op_quantize_row_q || params.op_dequantize_row_q || params.op_vec_dot_q)) { - params.op_quantize_row_q_reference = params.op_quantize_row_q = params.op_dequantize_row_q = params.op_vec_dot_q = true; + if (!(params.op_quantize_row_q_reference || params.op_quantize_row_q || params.op_dequantize_row_q || params.op_quantize_row_q_dot || params.op_vec_dot_q)) { + params.op_quantize_row_q_reference = params.op_quantize_row_q = params.op_dequantize_row_q = params.op_quantize_row_q_dot = params.op_vec_dot_q = true; } std::sort(params.test_sizes.begin(), params.test_sizes.end()); @@ -268,6 +271,20 @@ int main(int argc, char * argv[]) { printf("\n"); } + if (params.op_quantize_row_q_dot) { + printf(" quantize_row_q_dot\n"); + for (size_t size : params.test_sizes) { + printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024)); + auto quantize_fn = [&](void ) { + qfns.quantize_row_q_dot(test_data1, test_q1, size); + return test_q1[0]; + }; + size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); + benchmark_function(size, quantized_size, quantize_fn); + } + printf("\n"); + } + if (params.op_vec_dot_q) { printf(" vec_dot_q\n"); qfns.quantize_row_q(test_data1, test_q1, largest);