test-quantize-fns: CI fixes

Fix issues uncovered in CI - need to use sizes divisible by 32*8 for loop unrolling - use intrinsic header that should work on Mac
2023-04-14 23:15:13 +02:00 · 2023-04-14 23:15:13 +02:00 · 8bd7dd64ba
commit 8bd7dd64ba
parent ebee501cca
2 changed files with 9 additions and 13 deletions
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@ -86,7 +86,7 @@ float dot_product_error(quantize_fns_t & qfns, size_t test_size, const float * t
 int main(int argc, char * argv[]) {
    bool verbose = false;
-    const size_t test_size = 32 * 100;
+    const size_t test_size = 32 * 128;
    std::string arg;
    for (int i = 1; i < argc; i++) {
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@ -18,10 +18,10 @@
 #define WARMUP 5
 #define ITERATIONS 10
-#define L1_SIZE      32*100
+#define L1_SIZE      32*128
-#define L2_SIZE     32*2000
+#define L2_SIZE     32*2048
-#define L3_SIZE    32*20000
+#define L3_SIZE    32*20480
-#define MEM_SIZE 32*2000000
+#define MEM_SIZE 32*2048000
 struct quantize_perf_params {
    std::vector<std::string> include_types;
@ -36,7 +36,7 @@ struct quantize_perf_params {
 #if defined(__x86_64__) || defined(__i386__)
-#include <immintrin.h>
+#include <x86intrin.h>
 inline int64_t cpu_cycles() {
 // Rough way to detect new-ish CPUs
 #ifdef __POPCNT__
@ -71,16 +71,13 @@ void * align_with_offset(void * ptr, int offset) {
 }
 void benchmark_function(size_t size, size_t q_size, std::function<size_t(void)> function) {
    size_t bytes_out = 0;
    int64_t min_time_us = INT64_MAX;
    int64_t total_time_us = 0;
    int64_t min_time_cycles = INT64_MAX;
    int64_t total_time_cycles = 0;
    for (int i = 0; i < WARMUP; i++) {
-        bytes_out |= function();
+        function();
    }
@ -88,12 +85,11 @@ void benchmark_function(size_t size, size_t q_size, std::function<size_t(void)>
        const int64_t start_time = ggml_time_us();
        const int64_t start_cycles = cpu_cycles();
-        bytes_out |= function();
+        function();
        const int64_t end_cycles = cpu_cycles();
        const int64_t end_time = ggml_time_us();
        //printf("    aostne %d\n",  end_cycles - start_cycles);
        total_time_cycles += end_cycles - start_cycles;
        min_time_cycles = std::min(min_time_cycles, end_cycles - start_cycles);
        total_time_us += end_time - start_time;