llama_mmap : avoid unmapping the same fragments again in the destructor
This commit is contained in:
parent
6a72c7f2e3
commit
cd4167b634
1 changed files with 75 additions and 45 deletions
120
llama.cpp
120
llama.cpp
|
@ -816,24 +816,12 @@ struct llama_mmap {
|
||||||
|
|
||||||
llama_mmap(const llama_mmap &) = delete;
|
llama_mmap(const llama_mmap &) = delete;
|
||||||
|
|
||||||
static void align_offset(size_t * offset, size_t * len, size_t page_size) {
|
|
||||||
// align offset to the next page
|
|
||||||
size_t offset_in_page = *offset & (page_size - 1);
|
|
||||||
size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
|
|
||||||
*offset += offset_to_page;
|
|
||||||
|
|
||||||
if (offset_to_page >= *len) {
|
|
||||||
*len = 0;
|
|
||||||
} else {
|
|
||||||
*len -= offset_to_page;
|
|
||||||
// align len to the previous page
|
|
||||||
*len -= *len & (page_size - 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _POSIX_MAPPED_FILES
|
#ifdef _POSIX_MAPPED_FILES
|
||||||
static constexpr bool SUPPORTED = true;
|
static constexpr bool SUPPORTED = true;
|
||||||
|
|
||||||
|
// list of mapped fragments (first_offset, last_offset)
|
||||||
|
std::vector<std::pair<size_t, size_t>> mapped_fragments;
|
||||||
|
|
||||||
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
|
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
|
||||||
size = file->size;
|
size = file->size;
|
||||||
int fd = fileno(file->fp);
|
int fd = fileno(file->fp);
|
||||||
|
@ -841,8 +829,9 @@ struct llama_mmap {
|
||||||
// prefetch/readahead impairs performance on NUMA systems
|
// prefetch/readahead impairs performance on NUMA systems
|
||||||
if (numa) { prefetch = 0; }
|
if (numa) { prefetch = 0; }
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
|
// advise the kernel to read the file sequentially (increases readahead)
|
||||||
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
|
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
|
||||||
fprintf(stderr, "warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
|
LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
if (prefetch) { flags |= MAP_POPULATE; }
|
if (prefetch) { flags |= MAP_POPULATE; }
|
||||||
|
@ -853,9 +842,9 @@ struct llama_mmap {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prefetch > 0) {
|
if (prefetch > 0) {
|
||||||
// Advise the kernel to preload the mapped memory
|
// advise the kernel to preload the mapped memory
|
||||||
if (posix_madvise(addr, std::min(file->size, prefetch), POSIX_MADV_WILLNEED)) {
|
if (posix_madvise(addr, std::min(file->size, prefetch), POSIX_MADV_WILLNEED)) {
|
||||||
fprintf(stderr, "warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
|
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -863,32 +852,81 @@ struct llama_mmap {
|
||||||
// advise the kernel not to use readahead
|
// advise the kernel not to use readahead
|
||||||
// (because the next page might not belong on the same node)
|
// (because the next page might not belong on the same node)
|
||||||
if (posix_madvise(addr, file->size, POSIX_MADV_RANDOM)) {
|
if (posix_madvise(addr, file->size, POSIX_MADV_RANDOM)) {
|
||||||
fprintf(stderr, "warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
|
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// initialize list of mapped_fragments
|
||||||
|
mapped_fragments.emplace_back(0, file->size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void unmap(size_t offset, size_t len) {
|
static void align_range(size_t * first, size_t * last, size_t page_size) {
|
||||||
|
// align first to the next page
|
||||||
|
size_t offset_in_page = *first & (page_size - 1);
|
||||||
|
size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
|
||||||
|
*first += offset_to_page;
|
||||||
|
|
||||||
|
// align last to the previous page
|
||||||
|
*last = *last & ~(page_size - 1);
|
||||||
|
|
||||||
|
if (*last <= *first) {
|
||||||
|
*last = *first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// partially unmap the file in the range [first, last)
|
||||||
|
void unmap_fragment(size_t first, size_t last) {
|
||||||
|
// note: this function must not be called multiple times with overlapping ranges
|
||||||
|
// otherwise, there is a risk of invalidating addresses that have been repurposed for other mappings
|
||||||
int page_size = sysconf(_SC_PAGESIZE);
|
int page_size = sysconf(_SC_PAGESIZE);
|
||||||
align_offset(&offset, &len, page_size);
|
align_range(&first, &last, page_size);
|
||||||
if (len < (size_t)page_size) {
|
size_t len = last - first;
|
||||||
|
|
||||||
|
if (len == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void * next_page_start = (uint8_t *) addr + offset;
|
GGML_ASSERT(first % page_size == 0);
|
||||||
// unmap and discard the pages
|
GGML_ASSERT(last % page_size == 0);
|
||||||
|
GGML_ASSERT(last > first);
|
||||||
|
|
||||||
|
void * next_page_start = (uint8_t *) addr + first;
|
||||||
|
|
||||||
|
// unmap the range
|
||||||
if (munmap(next_page_start, len)) {
|
if (munmap(next_page_start, len)) {
|
||||||
fprintf(stderr, "warning: munmap failed: %s\n", strerror(errno));
|
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
|
||||||
}
|
}
|
||||||
if (posix_madvise(next_page_start, len, POSIX_MADV_DONTNEED)) {
|
|
||||||
fprintf(stderr, "warning: posix_madvise(.., POSIX_MADV_DONTNEED) failed: %s\n",
|
// update the list of mapped fragments to avoid unmapping the same range again in the destructor
|
||||||
strerror(errno));
|
std::vector<std::pair<size_t, size_t>> new_mapped_fragments;
|
||||||
|
for (const auto & frag : mapped_fragments) {
|
||||||
|
if (frag.first < first && frag.second > last) {
|
||||||
|
// the range is in the middle of the fragment, split it
|
||||||
|
new_mapped_fragments.emplace_back(frag.first, first);
|
||||||
|
new_mapped_fragments.emplace_back(last, frag.second);
|
||||||
|
} else if (frag.first < first && frag.second > first) {
|
||||||
|
// the range starts in the middle of the fragment
|
||||||
|
new_mapped_fragments.emplace_back(frag.first, first);
|
||||||
|
} else if (frag.first < last && frag.second > last) {
|
||||||
|
// the range ends in the middle of the fragment
|
||||||
|
new_mapped_fragments.emplace_back(last, frag.second);
|
||||||
|
} else if (frag.first >= first && frag.second <= last) {
|
||||||
|
// the range covers the entire fragment
|
||||||
|
} else {
|
||||||
|
// the range is outside the fragment
|
||||||
|
new_mapped_fragments.push_back(frag);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
mapped_fragments = std::move(new_mapped_fragments);
|
||||||
}
|
}
|
||||||
|
|
||||||
~llama_mmap() {
|
~llama_mmap() {
|
||||||
munmap(addr, size);
|
for (const auto & frag : mapped_fragments) {
|
||||||
|
if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
|
||||||
|
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
static constexpr bool SUPPORTED = true;
|
static constexpr bool SUPPORTED = true;
|
||||||
|
@ -936,18 +974,10 @@ struct llama_mmap {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void unmap(size_t offset, size_t len) {
|
void unmap_fragment(size_t first, size_t last) {
|
||||||
SYSTEM_INFO si;
|
// not supported
|
||||||
GetSystemInfo(&si);
|
GGML_UNUSED(first);
|
||||||
DWORD page_size = si.dwAllocationGranularity;
|
GGML_UNUSED(last);
|
||||||
align_offset(&offset, &len, page_size);
|
|
||||||
|
|
||||||
if (len < (size_t)page_size) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void * next_page_start = (uint8_t *) addr + offset;
|
|
||||||
VirtualAlloc(next_page_start, len, MEM_RESET, PAGE_NOACCESS);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
~llama_mmap() {
|
~llama_mmap() {
|
||||||
|
@ -2429,11 +2459,11 @@ struct llama_model_loader {
|
||||||
size_done += ggml_nbytes(cur);
|
size_done += ggml_nbytes(cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
// unmap GPU tensors
|
// unmap offloaded tensors and metadata
|
||||||
if (use_mmap && mapping) {
|
if (use_mmap && mapping) {
|
||||||
// unmap offloaded tensors and metadata
|
mapping->unmap_fragment(0, mmap_first);
|
||||||
mapping->unmap(0, mmap_first);
|
mapping->unmap_fragment(mmap_last, mmap_last);
|
||||||
mapping->unmap(mmap_last, mapping->size - mmap_last);
|
mapping->unmap_fragment(mmap_last, mapping->size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (progress_callback) {
|
if (progress_callback) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue