fix mmap usage when using host buffers

2024-10-07 20:42:52 +02:00 · 2024-10-07 20:42:52 +02:00 · 59ee00a880
commit 59ee00a880
parent 0f3e091f1d
1 changed files with 2 additions and 1 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -8903,7 +8903,8 @@ static bool llm_load_tensors(
        bufs.reserve(n_max_backend_buffer);

        // check if this backend device supports buffer_from_host_ptr
-        ggml_backend_dev_t dev = ggml_backend_buft_get_device(buft);
+        // when using a host buffer as the CPU bakcend buffer, use the CPU device to prioritize using buffer_from_host_ptr over the host buffer
+        ggml_backend_dev_t dev = ggml_backend_buft_get_device(buft == llama_default_buffer_type_cpu(model, true) ? ggml_backend_cpu_buffer_type() : buft);
        bool buffer_from_host_ptr_supported = false;
        if (dev) {
            ggml_backend_dev_props props;