From 1cdfdb34e032c459e75bc8b49edf86fded33ff78 Mon Sep 17 00:00:00 2001
From: ct-clmsn <ct.clmsn@gmail.com>
Date: Sat, 23 Dec 2023 22:35:47 -0500
Subject: [PATCH] fixed hpx runtime initialization and finalization

---
 llama.cpp | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/llama.cpp b/llama.cpp
index 8ad116b8f..eb8178b39 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -8771,13 +8771,6 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
     std::vector<int64_t> hist_all(1 << 4, 0);
 
 #if defined(GGML_USE_HPX)
-    {
-      std::string thread_arg = "--hpx:threads=" + std::to_string(nthread);
-      hpx::init_params params;
-      params.cfg = { thread_arg };
-      hpx::start(nullptr, 0, nullptr, params);
-    }
-
     std::vector<hpx::future<void>> futures;
     futures.reserve(nthread);
 #else
@@ -9352,6 +9345,7 @@ void llama_backend_init(bool numa) {
         struct ggml_init_params params = { 0, NULL, false };
         struct ggml_context * ctx = ggml_init(params);
         ggml_free(ctx);
+
     }
 
     if (numa) {
@@ -9361,12 +9355,27 @@ void llama_backend_init(bool numa) {
 #ifdef GGML_USE_MPI
     ggml_mpi_backend_init();
 #endif
+#ifdef GGML_USE_HPX
+    {
+        const auto nthread = std::thread::hardware_concurrency();
+        std::string thread_arg = "--hpx:threads=" + std::to_string(nthread);
+        hpx::init_params params;
+        params.cfg = { thread_arg };
+        hpx::start(nullptr, 0, nullptr, params);
+    }
+#endif
 }
 
 void llama_backend_free(void) {
 #ifdef GGML_USE_MPI
     ggml_mpi_backend_free();
 #endif
+#ifdef GGML_USE_HPX
+    {
+        hpx::post([]() { hpx::finalize(); });
+        hpx::stop();
+    }
+#endif
 }
 
 int64_t llama_time_us(void) {