diff --git a/CMakeLists.txt b/CMakeLists.txt index 22b13c192..0abc78f4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -304,6 +304,8 @@ if (LLAMA_OPENMP) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + else() + message(WARNING "OpenMP not found") endif() endif() diff --git a/ggml.c b/ggml.c index 0bef7ac85..0b27712a3 100644 --- a/ggml.c +++ b/ggml.c @@ -5,6 +5,7 @@ #include "ggml-quants.h" #include "ggml.h" + #if defined(_MSC_VER) || defined(__MINGW32__) #include // using malloc.h with MSC/MINGW #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) @@ -19485,6 +19486,12 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa if (n_threads <= 0) { n_threads = GGML_DEFAULT_N_THREADS; } +#if defined(GGML_USE_OPENMP) + // Limit the number of threads used to avoid deadlock + // ref: https://github.com/ggerganov/llama.cpp/pull/7606 + n_threads = MIN(n_threads, omp_get_max_threads()); + n_threads = MIN(n_threads, omp_get_thread_limit()); +#endif size_t work_size = 0;