Direct I/O and Transparent HugePages

--direct-io for bypassing page cache (and using THP on Linux)

Up to 3-6x faster uncached loading, fewer pageouts, no page cache pollution.
This commit is contained in:
Pavel Fatin 2024-05-20 21:55:33 +02:00
parent 917dc8cfa6
commit 1b17ed7ab6
10 changed files with 297 additions and 30 deletions

View file

@ -160,6 +160,7 @@ struct gpt_params {
bool instruct = false; // instruction mode (used for Alpaca models)
bool logits_all = false; // return logits for all tokens in the batch
bool use_mmap = true; // use mmap for faster loads
bool use_direct_io = false; // use direct I/O
bool use_mlock = false; // use mlock to keep model in memory
bool verbose_prompt = false; // print prompt tokens before generation
bool display_prompt = true; // print prompt before generation