From 875a1e111eaa9db3fd51be8c3b3288291ec2f1d2 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 26 Jun 2023 20:27:24 +0300 Subject: [PATCH] llama : avoid ggml include in llama-util.h --- llama-util.h | 18 ++++++++++-------- llama.cpp | 4 ++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/llama-util.h b/llama-util.h index d709319df..042ebe43c 100644 --- a/llama-util.h +++ b/llama-util.h @@ -16,8 +16,6 @@ #include #include -#include "ggml.h" - #ifdef __has_include #if __has_include() #include @@ -174,12 +172,12 @@ struct llama_mmap { #ifdef _POSIX_MAPPED_FILES static constexpr bool SUPPORTED = true; - llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) { + llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) { size = file->size; int fd = fileno(file->fp); int flags = MAP_SHARED; // prefetch/readahead impairs performance on NUMA systems - if (ggml_is_numa()) { prefetch = 0; } + if (numa) { prefetch = 0; } #ifdef __linux__ if (prefetch) { flags |= MAP_POPULATE; } #endif @@ -195,7 +193,7 @@ struct llama_mmap { strerror(errno)); } } - if (ggml_is_numa()) { + if (numa) { // advise the kernel not to use readahead // (because the next page might not belong on the same node) if (madvise(addr, file->size, MADV_RANDOM)) { @@ -211,7 +209,9 @@ struct llama_mmap { #elif defined(_WIN32) static constexpr bool SUPPORTED = true; - llama_mmap(struct llama_file * file, bool prefetch = true) { + llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) { + (void) numa; + size = file->size; HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp)); @@ -256,8 +256,10 @@ struct llama_mmap { #else static constexpr bool SUPPORTED = false; - llama_mmap(struct llama_file *, bool prefetch = true) { - (void)prefetch; + llama_mmap(struct llama_file *, bool prefetch = true, bool numa = false) { + (void) prefetch; + (void) numa; + throw std::runtime_error(std::string("mmap not supported")); } #endif diff --git a/llama.cpp b/llama.cpp index e932636fc..1a15844bc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -774,7 +774,7 @@ struct llama_model_loader { } if (use_mmap) { - mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size)); + mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size, ggml_is_numa())); if (lmlock) { lmlock->init(mapping->addr); } @@ -2903,7 +2903,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const // maybe this should in llama_model_loader if (model_loader->use_mmap) { - model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0)); + model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0, ggml_is_numa())); } }