From 1cdfdb34e032c459e75bc8b49edf86fded33ff78 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Sat, 23 Dec 2023 22:35:47 -0500 Subject: [PATCH] fixed hpx runtime initialization and finalization --- llama.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/llama.cpp b/llama.cpp index 8ad116b8f..eb8178b39 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8771,13 +8771,6 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s std::vector hist_all(1 << 4, 0); #if defined(GGML_USE_HPX) - { - std::string thread_arg = "--hpx:threads=" + std::to_string(nthread); - hpx::init_params params; - params.cfg = { thread_arg }; - hpx::start(nullptr, 0, nullptr, params); - } - std::vector> futures; futures.reserve(nthread); #else @@ -9352,6 +9345,7 @@ void llama_backend_init(bool numa) { struct ggml_init_params params = { 0, NULL, false }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); + } if (numa) { @@ -9361,12 +9355,27 @@ void llama_backend_init(bool numa) { #ifdef GGML_USE_MPI ggml_mpi_backend_init(); #endif +#ifdef GGML_USE_HPX + { + const auto nthread = std::thread::hardware_concurrency(); + std::string thread_arg = "--hpx:threads=" + std::to_string(nthread); + hpx::init_params params; + params.cfg = { thread_arg }; + hpx::start(nullptr, 0, nullptr, params); + } +#endif } void llama_backend_free(void) { #ifdef GGML_USE_MPI ggml_mpi_backend_free(); #endif +#ifdef GGML_USE_HPX + { + hpx::post([]() { hpx::finalize(); }); + hpx::stop(); + } +#endif } int64_t llama_time_us(void) {