llama : avoid ggml include in llama-util.h
This commit is contained in:
parent
0fe4b00de2
commit
875a1e111e
2 changed files with 12 additions and 10 deletions
18
llama-util.h
18
llama-util.h
|
@ -16,8 +16,6 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include "ggml.h"
|
|
||||||
|
|
||||||
#ifdef __has_include
|
#ifdef __has_include
|
||||||
#if __has_include(<unistd.h>)
|
#if __has_include(<unistd.h>)
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
@ -174,12 +172,12 @@ struct llama_mmap {
|
||||||
#ifdef _POSIX_MAPPED_FILES
|
#ifdef _POSIX_MAPPED_FILES
|
||||||
static constexpr bool SUPPORTED = true;
|
static constexpr bool SUPPORTED = true;
|
||||||
|
|
||||||
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) {
|
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
|
||||||
size = file->size;
|
size = file->size;
|
||||||
int fd = fileno(file->fp);
|
int fd = fileno(file->fp);
|
||||||
int flags = MAP_SHARED;
|
int flags = MAP_SHARED;
|
||||||
// prefetch/readahead impairs performance on NUMA systems
|
// prefetch/readahead impairs performance on NUMA systems
|
||||||
if (ggml_is_numa()) { prefetch = 0; }
|
if (numa) { prefetch = 0; }
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if (prefetch) { flags |= MAP_POPULATE; }
|
if (prefetch) { flags |= MAP_POPULATE; }
|
||||||
#endif
|
#endif
|
||||||
|
@ -195,7 +193,7 @@ struct llama_mmap {
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ggml_is_numa()) {
|
if (numa) {
|
||||||
// advise the kernel not to use readahead
|
// advise the kernel not to use readahead
|
||||||
// (because the next page might not belong on the same node)
|
// (because the next page might not belong on the same node)
|
||||||
if (madvise(addr, file->size, MADV_RANDOM)) {
|
if (madvise(addr, file->size, MADV_RANDOM)) {
|
||||||
|
@ -211,7 +209,9 @@ struct llama_mmap {
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
static constexpr bool SUPPORTED = true;
|
static constexpr bool SUPPORTED = true;
|
||||||
|
|
||||||
llama_mmap(struct llama_file * file, bool prefetch = true) {
|
llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
|
||||||
|
(void) numa;
|
||||||
|
|
||||||
size = file->size;
|
size = file->size;
|
||||||
|
|
||||||
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
|
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
|
||||||
|
@ -256,8 +256,10 @@ struct llama_mmap {
|
||||||
#else
|
#else
|
||||||
static constexpr bool SUPPORTED = false;
|
static constexpr bool SUPPORTED = false;
|
||||||
|
|
||||||
llama_mmap(struct llama_file *, bool prefetch = true) {
|
llama_mmap(struct llama_file *, bool prefetch = true, bool numa = false) {
|
||||||
(void)prefetch;
|
(void) prefetch;
|
||||||
|
(void) numa;
|
||||||
|
|
||||||
throw std::runtime_error(std::string("mmap not supported"));
|
throw std::runtime_error(std::string("mmap not supported"));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -774,7 +774,7 @@ struct llama_model_loader {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_mmap) {
|
if (use_mmap) {
|
||||||
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size));
|
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size, ggml_is_numa()));
|
||||||
if (lmlock) {
|
if (lmlock) {
|
||||||
lmlock->init(mapping->addr);
|
lmlock->init(mapping->addr);
|
||||||
}
|
}
|
||||||
|
@ -2903,7 +2903,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
|
||||||
|
|
||||||
// maybe this should in llama_model_loader
|
// maybe this should in llama_model_loader
|
||||||
if (model_loader->use_mmap) {
|
if (model_loader->use_mmap) {
|
||||||
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0));
|
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0, ggml_is_numa()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue