diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh index c979a52f6..7b39a18c0 100755 --- a/examples/sycl/run-llama2.sh +++ b/examples/sycl/run-llama2.sh @@ -12,6 +12,7 @@ if [ $# -gt 0 ]; then GGML_SYCL_SINGLE_GPU=1 else GGML_SYCL_DEVICE=0 + GGML_SYCL_SINGLE_GPU=0 fi #export GGML_SYCL_DEBUG=1 diff --git a/ggml-backend.c b/ggml-backend.c index 402d86ef3..c46aa16a2 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -196,6 +196,13 @@ size_t ggml_backend_get_max_size(ggml_backend_t backend) { return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend)); } +bool ggml_backend_is_support_mmap() { +#ifdef GGML_USE_SYCL + return false; +#endif + return true; +} + void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); diff --git a/ggml-backend.h b/ggml-backend.h index 744b6a774..6e4efd92e 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -26,6 +26,7 @@ extern "C" { GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend); GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft); + // buffer enum ggml_backend_buffer_usage { GGML_BACKEND_BUFFER_USAGE_ANY = 0, @@ -58,6 +59,7 @@ extern "C" { GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size); GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend); GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend); + GGML_API bool ggml_backend_is_support_mmap(); GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); diff --git a/llama.cpp b/llama.cpp index 5cfb4cef1..e84a99912 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1183,24 +1183,7 @@ struct llama_mmap { llama_mmap(const llama_mmap &) = delete; -#ifdef GGML_USE_SYCL - static constexpr bool SUPPORTED = false; - - llama_mmap(struct llama_file * file, size_t prefetch = -1, bool numa = false) { - GGML_UNUSED(file); - GGML_UNUSED(prefetch); - GGML_UNUSED(numa); - - throw std::runtime_error("mmap not supported"); - } - - void unmap_fragment(size_t first, size_t last) { - GGML_UNUSED(first); - GGML_UNUSED(last); - - throw std::runtime_error("mmap not supported"); - } -#elif defined(_POSIX_MAPPED_FILES) +#ifdef _POSIX_MAPPED_FILES static constexpr bool SUPPORTED = true; // list of mapped fragments (first_offset, last_offset) @@ -3129,7 +3112,7 @@ struct llama_model_loader { } } - if (!llama_mmap::SUPPORTED) { + if (!llama_mmap::SUPPORTED || !ggml_backend_is_support_mmap()) { LLAMA_LOG_WARN("%s: mmap is not supported on this platform\n", __func__); use_mmap = false; }