llama : avoid ggml include in llama-util.h

This commit is contained in:
Georgi Gerganov 2023-06-26 20:27:24 +03:00
parent 0fe4b00de2
commit 875a1e111e
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 12 additions and 10 deletions

View file

@ -16,8 +16,6 @@
#include <vector> #include <vector>
#include <stdexcept> #include <stdexcept>
#include "ggml.h"
#ifdef __has_include #ifdef __has_include
#if __has_include(<unistd.h>) #if __has_include(<unistd.h>)
#include <unistd.h> #include <unistd.h>
@ -174,12 +172,12 @@ struct llama_mmap {
#ifdef _POSIX_MAPPED_FILES #ifdef _POSIX_MAPPED_FILES
static constexpr bool SUPPORTED = true; static constexpr bool SUPPORTED = true;
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) { llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
size = file->size; size = file->size;
int fd = fileno(file->fp); int fd = fileno(file->fp);
int flags = MAP_SHARED; int flags = MAP_SHARED;
// prefetch/readahead impairs performance on NUMA systems // prefetch/readahead impairs performance on NUMA systems
if (ggml_is_numa()) { prefetch = 0; } if (numa) { prefetch = 0; }
#ifdef __linux__ #ifdef __linux__
if (prefetch) { flags |= MAP_POPULATE; } if (prefetch) { flags |= MAP_POPULATE; }
#endif #endif
@ -195,7 +193,7 @@ struct llama_mmap {
strerror(errno)); strerror(errno));
} }
} }
if (ggml_is_numa()) { if (numa) {
// advise the kernel not to use readahead // advise the kernel not to use readahead
// (because the next page might not belong on the same node) // (because the next page might not belong on the same node)
if (madvise(addr, file->size, MADV_RANDOM)) { if (madvise(addr, file->size, MADV_RANDOM)) {
@ -211,7 +209,9 @@ struct llama_mmap {
#elif defined(_WIN32) #elif defined(_WIN32)
static constexpr bool SUPPORTED = true; static constexpr bool SUPPORTED = true;
llama_mmap(struct llama_file * file, bool prefetch = true) { llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
(void) numa;
size = file->size; size = file->size;
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp)); HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
@ -256,8 +256,10 @@ struct llama_mmap {
#else #else
static constexpr bool SUPPORTED = false; static constexpr bool SUPPORTED = false;
llama_mmap(struct llama_file *, bool prefetch = true) { llama_mmap(struct llama_file *, bool prefetch = true, bool numa = false) {
(void) prefetch; (void) prefetch;
(void) numa;
throw std::runtime_error(std::string("mmap not supported")); throw std::runtime_error(std::string("mmap not supported"));
} }
#endif #endif

View file

@ -774,7 +774,7 @@ struct llama_model_loader {
} }
if (use_mmap) { if (use_mmap) {
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size)); mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size, ggml_is_numa()));
if (lmlock) { if (lmlock) {
lmlock->init(mapping->addr); lmlock->init(mapping->addr);
} }
@ -2903,7 +2903,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
// maybe this should in llama_model_loader // maybe this should in llama_model_loader
if (model_loader->use_mmap) { if (model_loader->use_mmap) {
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0)); model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0, ggml_is_numa()));
} }
} }