refactor the solution, use host buf to fix it, instead of disable mmap
This commit is contained in:
parent
7e54166562
commit
77e9703de7
4 changed files with 5 additions and 12 deletions
|
@ -196,13 +196,6 @@ size_t ggml_backend_get_max_size(ggml_backend_t backend) {
|
||||||
return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend));
|
return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ggml_backend_is_support_mmap(void) {
|
|
||||||
#ifdef GGML_USE_SYCL
|
|
||||||
return false;
|
|
||||||
#endif
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||||
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
||||||
|
|
|
@ -26,7 +26,6 @@ extern "C" {
|
||||||
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
||||||
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
||||||
|
|
||||||
|
|
||||||
// buffer
|
// buffer
|
||||||
enum ggml_backend_buffer_usage {
|
enum ggml_backend_buffer_usage {
|
||||||
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
||||||
|
@ -59,7 +58,6 @@ extern "C" {
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
||||||
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
||||||
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
||||||
GGML_API bool ggml_backend_is_support_mmap(void);
|
|
||||||
|
|
||||||
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
|
|
@ -16821,11 +16821,13 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
||||||
const dpct::queue_ptr stream = g_syclStreams[ctx->device][0];
|
const dpct::queue_ptr stream = g_syclStreams[ctx->device][0];
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
|
CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
|
||||||
|
char* host_buf = (char*)malloc(size);
|
||||||
|
memcpy(host_buf, data, size);
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR((*stream)
|
CHECK_TRY_ERROR((*stream)
|
||||||
.memcpy((char *)tensor->data + offset, data, size)
|
.memcpy((char *)tensor->data + offset, host_buf, size)
|
||||||
.wait()));
|
.wait()));
|
||||||
|
free(host_buf);
|
||||||
}
|
}
|
||||||
catch (sycl::exception const &exc) {
|
catch (sycl::exception const &exc) {
|
||||||
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
||||||
|
|
|
@ -3112,7 +3112,7 @@ struct llama_model_loader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!llama_mmap::SUPPORTED || !ggml_backend_is_support_mmap()) {
|
if (!llama_mmap::SUPPORTED) {
|
||||||
LLAMA_LOG_WARN("%s: mmap is not supported on this platform\n", __func__);
|
LLAMA_LOG_WARN("%s: mmap is not supported on this platform\n", __func__);
|
||||||
use_mmap = false;
|
use_mmap = false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue