profiler: make profiler optional with GGML_GRAPH_PROFILER
This commit is contained in:
parent
b7ae2d176e
commit
4578c37a92
6 changed files with 83 additions and 18 deletions
|
@ -75,6 +75,7 @@ option(GGML_CCACHE "ggml: use ccache if available" ON)
|
||||||
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
||||||
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
|
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
|
||||||
option(GGML_GPROF "ggml: enable gprof" OFF)
|
option(GGML_GPROF "ggml: enable gprof" OFF)
|
||||||
|
option(GGML_GRAPH_PROFILER "ggml: enable internal Graph and Op profiler" OFF)
|
||||||
|
|
||||||
# build
|
# build
|
||||||
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)
|
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)
|
||||||
|
|
|
@ -9,6 +9,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||||
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
|
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (GGML_GRAPH_PROFILER)
|
||||||
|
add_compile_definitions(GGML_GRAPH_PROFILER)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
if (GGML_SANITIZE_THREAD)
|
if (GGML_SANITIZE_THREAD)
|
||||||
add_compile_options(-fsanitize=thread)
|
add_compile_options(-fsanitize=thread)
|
||||||
|
|
|
@ -157,17 +157,6 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g
|
||||||
GGML_ABORT("fatal error");
|
GGML_ABORT("fatal error");
|
||||||
}
|
}
|
||||||
|
|
||||||
// op profile data (per op / per thread)
|
|
||||||
enum ggml_profile_event {
|
|
||||||
GGML_PROF_OP_START,
|
|
||||||
GGML_PROF_OP_SYNC,
|
|
||||||
GGML_PROF_OP_END
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ggml_profile_data {
|
|
||||||
uint64_t nsec[GGML_PROF_OP_END + 1]; // event times in nsec
|
|
||||||
};
|
|
||||||
|
|
||||||
// computation graph
|
// computation graph
|
||||||
|
|
||||||
enum ggml_cgraph_eval_order {
|
enum ggml_cgraph_eval_order {
|
||||||
|
@ -176,6 +165,8 @@ enum ggml_cgraph_eval_order {
|
||||||
GGML_CGRAPH_EVAL_ORDER_COUNT
|
GGML_CGRAPH_EVAL_ORDER_COUNT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ggml_profile_data;
|
||||||
|
|
||||||
struct ggml_cgraph {
|
struct ggml_cgraph {
|
||||||
int size;
|
int size;
|
||||||
int n_nodes;
|
int n_nodes;
|
||||||
|
@ -194,12 +185,6 @@ struct ggml_cgraph {
|
||||||
|
|
||||||
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
|
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
|
||||||
|
|
||||||
void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads);
|
|
||||||
void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads);
|
|
||||||
void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads);
|
|
||||||
void ggml_profile_graph_free(struct ggml_cgraph *cg);
|
|
||||||
void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,9 +1,12 @@
|
||||||
#include "ggml-impl.h"
|
#include "ggml-profile.h"
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
||||||
|
#ifdef GGML_GRAPH_PROFILER
|
||||||
|
|
||||||
extern "C" void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads)
|
extern "C" void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads)
|
||||||
{
|
{
|
||||||
if (!getenv("GGML_GRAPH_PROFILE")) { return; }
|
if (!getenv("GGML_GRAPH_PROFILE")) { return; }
|
||||||
|
@ -138,3 +141,5 @@ extern "C" void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_pr
|
||||||
using clock = std::chrono::high_resolution_clock;
|
using clock = std::chrono::high_resolution_clock;
|
||||||
cg->prof[node_n][ith].nsec[e] = std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
|
cg->prof[node_n][ith].nsec[e] = std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif // GGML_GRAPH_PROFILER
|
||||||
|
|
69
ggml/src/ggml-profile.h
Normal file
69
ggml/src/ggml-profile.h
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml-impl.h"
|
||||||
|
|
||||||
|
// GGML internal header
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// op profile data (per op / per thread)
|
||||||
|
enum ggml_profile_event {
|
||||||
|
GGML_PROF_OP_START,
|
||||||
|
GGML_PROF_OP_SYNC,
|
||||||
|
GGML_PROF_OP_END
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_profile_data {
|
||||||
|
uint64_t nsec[GGML_PROF_OP_END + 1]; // event times in nsec
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifndef GGML_GRAPH_PROFILER
|
||||||
|
|
||||||
|
// Stub out all profiler functions
|
||||||
|
|
||||||
|
static inline void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads)
|
||||||
|
{
|
||||||
|
GGML_UNUSED(cg);
|
||||||
|
GGML_UNUSED(n_threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads)
|
||||||
|
{
|
||||||
|
GGML_UNUSED(cg);
|
||||||
|
GGML_UNUSED(n_threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads)
|
||||||
|
{
|
||||||
|
GGML_UNUSED(cg);
|
||||||
|
GGML_UNUSED(n_threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ggml_profile_graph_free(struct ggml_cgraph *cg)
|
||||||
|
{
|
||||||
|
GGML_UNUSED(cg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith)
|
||||||
|
{
|
||||||
|
GGML_UNUSED(cg);
|
||||||
|
GGML_UNUSED(e);
|
||||||
|
GGML_UNUSED(node_n);
|
||||||
|
GGML_UNUSED(ith);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads);
|
||||||
|
void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads);
|
||||||
|
void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads);
|
||||||
|
void ggml_profile_graph_free(struct ggml_cgraph *cg);
|
||||||
|
void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith);
|
||||||
|
|
||||||
|
#endif // GGML_GRAPH_PROFILER
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
|
@ -7,6 +7,7 @@
|
||||||
#include "ggml-quants.h"
|
#include "ggml-quants.h"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "ggml-aarch64.h"
|
#include "ggml-aarch64.h"
|
||||||
|
#include "ggml-profile.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
#include <malloc.h> // using malloc.h with MSC/MINGW
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue