From 524104581994c3a546733dee3ab7632448003cf2 Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 20 Dec 2023 12:57:45 +0100 Subject: [PATCH] use posix_fadvise64(.., POSIX_FADV_SEQUENTIAL) to improve performance with mmap --- llama.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 464ee1249..0c196b6ac 100644 --- a/llama.cpp +++ b/llama.cpp @@ -33,6 +33,7 @@ #include #if defined(_POSIX_MAPPED_FILES) #include + #include #endif #if defined(_POSIX_MEMLOCK_RANGE) #include @@ -840,6 +841,10 @@ struct llama_mmap { // prefetch/readahead impairs performance on NUMA systems if (numa) { prefetch = 0; } #ifdef __linux__ + if (posix_fadvise64(fd, 0, file->size, POSIX_FADV_SEQUENTIAL)) { + fprintf(stderr, "warning: fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n", + strerror(errno)); + } if (prefetch) { flags |= MAP_POPULATE; } #endif addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0); @@ -2314,7 +2319,9 @@ struct llama_model_loader { } */ // prefetch the whole file - all the data is needed anyway - mapping.reset(new llama_mmap(&file, -1, ggml_is_numa())); + if (use_mmap) { + mapping.reset(new llama_mmap(&file, -1, ggml_is_numa())); + } } // for backwards compatibility only