diff --git a/CMakeLists.txt b/CMakeLists.txt
index 22b13c192..0abc78f4c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -304,6 +304,8 @@ if (LLAMA_OPENMP)
 
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    else()
+        message(WARNING "OpenMP not found")
     endif()
 endif()
 
diff --git a/ggml.c b/ggml.c
index 0bef7ac85..0b27712a3 100644
--- a/ggml.c
+++ b/ggml.c
@@ -5,6 +5,7 @@
 #include "ggml-quants.h"
 #include "ggml.h"
 
+
 #if defined(_MSC_VER) || defined(__MINGW32__)
 #include <malloc.h> // using malloc.h with MSC/MINGW
 #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
@@ -19485,6 +19486,12 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
     if (n_threads <= 0) {
         n_threads = GGML_DEFAULT_N_THREADS;
     }
+#if defined(GGML_USE_OPENMP)
+    // Limit the number of threads used to avoid deadlock
+    // ref: https://github.com/ggerganov/llama.cpp/pull/7606
+    n_threads = MIN(n_threads, omp_get_max_threads());
+    n_threads = MIN(n_threads, omp_get_thread_limit());
+#endif
 
     size_t work_size = 0;