From 3a88e8e803c0e6834916370a5cd0dc401a38410d Mon Sep 17 00:00:00 2001 From: katsu560 Date: Sun, 25 Jun 2023 15:59:39 +0900 Subject: [PATCH] fix test quantize perf --- tests/test-quantize-perf.cpp | 59 +++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 600375771..09256ad30 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -21,6 +21,7 @@ #define QK 32 #define WARMUP 5 #define ITERATIONS 10 +#define MAX_ITERATIONS 100000000 #define L1_SIZE 32*128 #define L2_SIZE 32*2048 @@ -36,8 +37,10 @@ struct quantize_perf_params { bool op_dequantize_row_q = false; bool op_quantize_row_q_dot = false; bool op_vec_dot_q = false; + int64_t iterations = ITERATIONS; }; +int64_t iterations = ITERATIONS; #if defined(__x86_64__) || defined(__i386__) @@ -86,7 +89,7 @@ void benchmark_function(size_t size, size_t q_size, std::function } - for (int i = 0; i < ITERATIONS; i++) { + for (int i = 0; i < iterations; i++) { const int64_t start_time = ggml_time_us(); const int64_t start_cycles = cpu_cycles(); @@ -102,9 +105,38 @@ void benchmark_function(size_t size, size_t q_size, std::function } printf(" min cycles/%d vals : %9.2f\n", QK, QK * min_time_cycles / (float) size); - printf(" avg cycles/%d vals : %9.2f\n", QK, QK * total_time_cycles / (float) (size * ITERATIONS)); - printf(" float32 throughput : %9.2f GB/s\n", gigabytes_per_second(4 * size * ITERATIONS, total_time_us)); - printf(" quantized throughput : %9.2f GB/s\n", gigabytes_per_second(q_size * ITERATIONS, total_time_us)); + printf(" avg cycles/%d vals : %9.2f\n", QK, QK * total_time_cycles / (float) (size * iterations)); + printf(" float32 throughput : %9.2f GB/s\n", gigabytes_per_second(4 * size * iterations, total_time_us)); + printf(" quantized throughput : %9.2f GB/s\n", gigabytes_per_second(q_size * iterations, total_time_us)); +} + +void usage(char * argv[]) { + printf("Benchmark quantization specific functions on synthetic data\n"); + printf("\n"); + printf("usage: %s [options]\n", argv[0]); + printf("\n"); + printf("options: (default)\n"); + printf(" -h, --help show this help message and exit\n"); + printf(" --size SIZE set test size, divisible by 32 (L1_SIZE:%d)\n", L1_SIZE); + printf(" -3 use size as L1, L2, L3 sizes (L1:%d L2:%d L3:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE); + printf(" -4 use size as L1, L2, L3, MEM sizes (L1:%d L2:%d L3:%d MEM:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE, MEM_SIZE); + printf(" --op OP set test opration as quantize_row_q_reference, quantize_row_q, dequantize_row_q,\n"); + printf(" quantize_row_q_dot, vec_dot_q (all)\n"); + printf(" --type TYPE set test type as"); + for (int i = 0; i < GGML_TYPE_COUNT; i++) { + ggml_type type = (ggml_type) i; + quantize_fns_t qfns = ggml_internal_get_quantize_fn(type); + if (ggml_type_name(type) != NULL) { + if (qfns.quantize_row_q && qfns.dequantize_row_q) { + printf(" %s", ggml_type_name(type)); + } + } + } + printf(" (all)\n"); + printf(" --alignment-offset OFFSET\n"); + printf(" set alignment offset as OFFSET (0)\n"); + printf(" -i NUM, --iterations NUM\n"); + printf(" set test iteration number (%d)\n", ITERATIONS); } int main(int argc, char * argv[]) { @@ -178,6 +210,21 @@ int main(int argc, char * argv[]) { break; } params.alignment_offset = alignment; + } else if ((arg == "-i") || (arg == "--iterations")) { + if (++i >= argc) { + invalid_param = true; + break; + } + int number = std::stoi(argv[i]); + if (number < 0 || number > MAX_ITERATIONS) { + fprintf(stderr, "error: iterations must be less than %d\n", MAX_ITERATIONS); + invalid_param = true; + break; + } + params.iterations = number; + } else if ((arg == "-h") || (arg == "--help")) { + usage(argv); + return 1; } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); return 1; @@ -213,6 +260,8 @@ int main(int argc, char * argv[]) { generate_data(0, largest, test_data1); generate_data(1, largest, test_data2); + iterations = params.iterations; + // Initialize GGML, ensures float conversion tables are initialized struct ggml_init_params ggml_params = { @@ -225,7 +274,7 @@ int main(int argc, char * argv[]) { for (int i = 0; i < GGML_TYPE_COUNT; i++) { ggml_type type = (ggml_type) i; quantize_fns_t qfns = ggml_internal_get_quantize_fn(i); - if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) { + if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) { continue; }