diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 89fdf9d1c..1bee9e313 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -75,6 +75,7 @@ option(GGML_CCACHE "ggml: use ccache if available" ON) option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON) option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF) option(GGML_GPROF "ggml: enable gprof" OFF) +option(GGML_GRAPH_PROFILER "ggml: enable internal Graph and Op profiler" OFF) # build option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index e1424fc6a..098a89ac1 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -9,6 +9,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") add_compile_definitions($<$:_GLIBCXX_ASSERTIONS>) endif() +if (GGML_GRAPH_PROFILER) + add_compile_definitions(GGML_GRAPH_PROFILER) +endif() + if (NOT MSVC) if (GGML_SANITIZE_THREAD) add_compile_options(-fsanitize=thread) diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index de634bdee..4007b4339 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -157,17 +157,6 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g GGML_ABORT("fatal error"); } -// op profile data (per op / per thread) -enum ggml_profile_event { - GGML_PROF_OP_START, - GGML_PROF_OP_SYNC, - GGML_PROF_OP_END -}; - -struct ggml_profile_data { - uint64_t nsec[GGML_PROF_OP_END + 1]; // event times in nsec -}; - // computation graph enum ggml_cgraph_eval_order { @@ -176,6 +165,8 @@ enum ggml_cgraph_eval_order { GGML_CGRAPH_EVAL_ORDER_COUNT }; +struct ggml_profile_data; + struct ggml_cgraph { int size; int n_nodes; @@ -194,12 +185,6 @@ struct ggml_cgraph { struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); -void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads); -void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads); -void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads); -void ggml_profile_graph_free(struct ggml_cgraph *cg); -void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith); - #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-profile.cpp b/ggml/src/ggml-profile.cpp index e6ee6aea0..0f59455a5 100644 --- a/ggml/src/ggml-profile.cpp +++ b/ggml/src/ggml-profile.cpp @@ -1,9 +1,12 @@ -#include "ggml-impl.h" +#include "ggml-profile.h" + #include #include #include +#ifdef GGML_GRAPH_PROFILER + extern "C" void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads) { if (!getenv("GGML_GRAPH_PROFILE")) { return; } @@ -138,3 +141,5 @@ extern "C" void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_pr using clock = std::chrono::high_resolution_clock; cg->prof[node_n][ith].nsec[e] = std::chrono::nanoseconds(clock::now().time_since_epoch()).count(); } + +#endif // GGML_GRAPH_PROFILER diff --git a/ggml/src/ggml-profile.h b/ggml/src/ggml-profile.h new file mode 100644 index 000000000..e572b91da --- /dev/null +++ b/ggml/src/ggml-profile.h @@ -0,0 +1,69 @@ +#pragma once + +#include "ggml-impl.h" + +// GGML internal header + +#ifdef __cplusplus +extern "C" { +#endif + +// op profile data (per op / per thread) +enum ggml_profile_event { + GGML_PROF_OP_START, + GGML_PROF_OP_SYNC, + GGML_PROF_OP_END +}; + +struct ggml_profile_data { + uint64_t nsec[GGML_PROF_OP_END + 1]; // event times in nsec +}; + +#ifndef GGML_GRAPH_PROFILER + +// Stub out all profiler functions + +static inline void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads) +{ + GGML_UNUSED(cg); + GGML_UNUSED(n_threads); +} + +static inline void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads) +{ + GGML_UNUSED(cg); + GGML_UNUSED(n_threads); +} + +static inline void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads) +{ + GGML_UNUSED(cg); + GGML_UNUSED(n_threads); +} + +static inline void ggml_profile_graph_free(struct ggml_cgraph *cg) +{ + GGML_UNUSED(cg); +} + +static inline void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith) +{ + GGML_UNUSED(cg); + GGML_UNUSED(e); + GGML_UNUSED(node_n); + GGML_UNUSED(ith); +} + +#else + +void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads); +void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads); +void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads); +void ggml_profile_graph_free(struct ggml_cgraph *cg); +void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith); + +#endif // GGML_GRAPH_PROFILER + +#ifdef __cplusplus +} +#endif diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 9be8341ed..3d00124f8 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -7,6 +7,7 @@ #include "ggml-quants.h" #include "ggml.h" #include "ggml-aarch64.h" +#include "ggml-profile.h" #if defined(_MSC_VER) || defined(__MINGW32__) #include // using malloc.h with MSC/MINGW