diff --git a/Makefile b/Makefile index a1b99c6f9..1c450f055 100644 --- a/Makefile +++ b/Makefile @@ -176,7 +176,7 @@ libllama.so: llama.o ggml.o # Tests # -benchmark: examples/benchmark/benchmark-q4_0-matmult.c ggml.o +benchmark: examples/benchmark/benchmark-q4_0-matmult.c ggml.o llama.o common.o $(CXX) $(CXXFLAGS) $^ -o benchmark-q4_0-matmult $(LDFLAGS) ./benchmark-q4_0-matmult diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 67a7cea54..8f53244f6 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -34,4 +34,5 @@ else() add_subdirectory(quantize-stats) add_subdirectory(perplexity) add_subdirectory(embedding) + add_subdirectory(benchmark) endif() diff --git a/examples/benchmark/CMakeLists.txt b/examples/benchmark/CMakeLists.txt new file mode 100644 index 000000000..31aaa190b --- /dev/null +++ b/examples/benchmark/CMakeLists.txt @@ -0,0 +1,4 @@ +set(TARGET benchmark) +add_executable(${TARGET} benchmark-q4_0-matmul.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/benchmark/benchmark-q4_0-matmult.c b/examples/benchmark/benchmark-q4_0-matmult.c index 84b06766c..27c00e1ce 100644 --- a/examples/benchmark/benchmark-q4_0-matmult.c +++ b/examples/benchmark/benchmark-q4_0-matmult.c @@ -8,6 +8,7 @@ #include #include "ggml.h" +#include "llama.h" #include #include #include @@ -45,7 +46,7 @@ float tensor_sum_elements(struct ggml_tensor * tensor) { #define TENSOR_TYPE_AS_STR(TYPE) TYPE == GGML_TYPE_F32 ? "FP32" : TYPE == GGML_TYPE_F16 ? "FP16" : TYPE == GGML_TYPE_Q4_0 ? "Q4_0" : TYPE == GGML_TYPE_Q4_1 ? "Q4_1" : "UNKNOWN" -#define TENSOR_DUMP(TENSOR) printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", #TENSOR, \ +#define TENSOR_DUMP(TENSOR) printf("%15s: type = %i (%5s) ne = %5ld x %5ld x %5ld, nb = (%5li, %5li, %5li) - ", #TENSOR, \ TENSOR->type,TENSOR_TYPE_AS_STR(TENSOR->type),\ TENSOR->ne[0], TENSOR->ne[1], TENSOR->ne[2], TENSOR->nb[0], TENSOR->nb[1], TENSOR->nb[2]); \ { float sum = tensor_sum_elements(TENSOR); printf("Sum of tensor %s is %6.2f\n",#TENSOR, sum); } @@ -170,12 +171,40 @@ int main(int argc, char ** argv) { struct ggml_cgraph gf = ggml_build_forward(m11xm2); gf.n_threads=benchmark_params.n_threads; - printf("cgraph->n_threads=%i\n",gf.n_threads); + fprintf(stderr, "system_info: n_threads = %d | %s\n", + benchmark_params.n_threads, llama_print_system_info()); TENSOR_DUMP(m11); TENSOR_DUMP(m2); ggml_graph_compute(ctx, &gf); + { + const int dimx = sizex; + const int dimy = sizey; + const int dimz = sizez; + long long int flops_per_dot_product = dimy + dimy; + long long int flops_per_matrix = flops_per_dot_product * dimx * dimz; ; + printf("Matrix Multiplication of (%i,%i,%i) x (%i,%i,%i) - about %6.2f gFLOPS\n\n", sizex, sizey, 1, sizex, sizez, 1, 1.0f*flops_per_matrix / 1000 / 1000 / 1000); + + printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; FLOPS_per_u_Second\n"); + printf("==============================================================================================\n"); + + for (int i=0;itype == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return;