From 77e9703de74338747c485746e8d20f8cc19d61b3 Mon Sep 17 00:00:00 2001 From: Jianyu Zhang Date: Fri, 12 Apr 2024 17:16:36 +0800 Subject: [PATCH] refactor the solution, use host buf to fix it, instead of disable mmap --- ggml-backend.c | 7 ------- ggml-backend.h | 2 -- ggml-sycl.cpp | 6 ++++-- llama.cpp | 2 +- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/ggml-backend.c b/ggml-backend.c index bec563f0c..402d86ef3 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -196,13 +196,6 @@ size_t ggml_backend_get_max_size(ggml_backend_t backend) { return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend)); } -bool ggml_backend_is_support_mmap(void) { -#ifdef GGML_USE_SYCL - return false; -#endif - return true; -} - void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); diff --git a/ggml-backend.h b/ggml-backend.h index 54702de88..744b6a774 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -26,7 +26,6 @@ extern "C" { GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend); GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft); - // buffer enum ggml_backend_buffer_usage { GGML_BACKEND_BUFFER_USAGE_ANY = 0, @@ -59,7 +58,6 @@ extern "C" { GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size); GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend); GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend); - GGML_API bool ggml_backend_is_support_mmap(void); GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index b0082fa1c..86091cfbf 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -16821,11 +16821,13 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, const dpct::queue_ptr stream = g_syclStreams[ctx->device][0]; SYCL_CHECK( CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw())); - + char* host_buf = (char*)malloc(size); + memcpy(host_buf, data, size); SYCL_CHECK( CHECK_TRY_ERROR((*stream) - .memcpy((char *)tensor->data + offset, data, size) + .memcpy((char *)tensor->data + offset, host_buf, size) .wait())); + free(host_buf); } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ diff --git a/llama.cpp b/llama.cpp index 8ce319e45..dad2c4fbf 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3112,7 +3112,7 @@ struct llama_model_loader { } } - if (!llama_mmap::SUPPORTED || !ggml_backend_is_support_mmap()) { + if (!llama_mmap::SUPPORTED) { LLAMA_LOG_WARN("%s: mmap is not supported on this platform\n", __func__); use_mmap = false; }