Direct I/O and Transparent HugePages
--direct-io for bypassing page cache (and using THP on Linux) Up to 3-6x faster uncached loading, fewer pageouts, no page cache pollution.
This commit is contained in:
parent
917dc8cfa6
commit
1b17ed7ab6
10 changed files with 297 additions and 30 deletions
2
llama.h
2
llama.h
|
@ -260,6 +260,7 @@ extern "C" {
|
|||
// Keep the booleans together to avoid misalignment during copy-by-value.
|
||||
bool vocab_only; // only load the vocabulary, no weights
|
||||
bool use_mmap; // use mmap if possible
|
||||
bool use_direct_io; // use direct I/O if possible
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
bool check_tensors; // validate model tensor data
|
||||
};
|
||||
|
@ -409,6 +410,7 @@ extern "C" {
|
|||
LLAMA_API size_t llama_max_devices(void);
|
||||
|
||||
LLAMA_API bool llama_supports_mmap (void);
|
||||
LLAMA_API bool llama_supports_direct_io (void);
|
||||
LLAMA_API bool llama_supports_mlock (void);
|
||||
LLAMA_API bool llama_supports_gpu_offload(void);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue