fixed hpx runtime initialization and finalization

2023-12-23 22:35:47 -05:00 · 2023-12-23 22:35:47 -05:00 · 1cdfdb34e0
commit 1cdfdb34e0
parent e1eb3d14f1
1 changed files with 16 additions and 7 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -8771,13 +8771,6 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
    std::vector<int64_t> hist_all(1 << 4, 0);

 #if defined(GGML_USE_HPX)
-    {
-      std::string thread_arg = "--hpx:threads=" + std::to_string(nthread);
-      hpx::init_params params;
-      params.cfg = { thread_arg };
-      hpx::start(nullptr, 0, nullptr, params);
-    }
-
    std::vector<hpx::future<void>> futures;
    futures.reserve(nthread);
 #else
@ -9352,6 +9345,7 @@ void llama_backend_init(bool numa) {
        struct ggml_init_params params = { 0, NULL, false };
        struct ggml_context * ctx = ggml_init(params);
        ggml_free(ctx);
+
    }

    if (numa) {
@ -9361,12 +9355,27 @@ void llama_backend_init(bool numa) {
 #ifdef GGML_USE_MPI
    ggml_mpi_backend_init();
 #endif
+#ifdef GGML_USE_HPX
+    {
+        const auto nthread = std::thread::hardware_concurrency();
+        std::string thread_arg = "--hpx:threads=" + std::to_string(nthread);
+        hpx::init_params params;
+        params.cfg = { thread_arg };
+        hpx::start(nullptr, 0, nullptr, params);
+    }
+#endif
 }

 void llama_backend_free(void) {
 #ifdef GGML_USE_MPI
    ggml_mpi_backend_free();
 #endif
+#ifdef GGML_USE_HPX
+    {
+        hpx::post([]() { hpx::finalize(); });
+        hpx::stop();
+    }
+#endif
 }

 int64_t llama_time_us(void) {