make --numa a param
This commit is contained in:
parent
8502d5178e
commit
bf83dcb279
4 changed files with 10 additions and 1 deletions
|
@ -288,6 +288,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
params.use_mmap = false;
|
||||
} else if (arg == "--mtest") {
|
||||
params.mem_test = true;
|
||||
} else if (arg == "--numa") {
|
||||
params.numa = true;
|
||||
} else if (arg == "--verbose-prompt") {
|
||||
params.verbose_prompt = true;
|
||||
} else if (arg == "-r" || arg == "--reverse-prompt") {
|
||||
|
@ -421,6 +423,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||
if (llama_mmap_supported()) {
|
||||
fprintf(stderr, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
|
||||
}
|
||||
fprintf(stderr, " --numa attempt optimizations that help on some NUMA systems\n");
|
||||
fprintf(stderr, " if run without this previously, it is recommended to drop the system page cache before using this\n");
|
||||
fprintf(stderr, " see https://github.com/ggerganov/llama.cpp/issues/1437\n");
|
||||
fprintf(stderr, " -ngl N, --n-gpu-layers N\n");
|
||||
fprintf(stderr, " number of layers to store in VRAM\n");
|
||||
fprintf(stderr, " --mtest compute maximum memory usage\n");
|
||||
|
|
|
@ -70,6 +70,7 @@ struct gpt_params {
|
|||
bool use_mmap = true; // use mmap for faster loads
|
||||
bool use_mlock = false; // use mlock to keep model in memory
|
||||
bool mem_test = false; // compute maximum memory usage
|
||||
bool numa = false; // attempt optimizations that help on some NUMA systems
|
||||
bool verbose_prompt = false; // print prompt tokens before generation
|
||||
};
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
#include "build-info.h"
|
||||
|
||||
#include <cassert>
|
||||
|
@ -97,6 +98,9 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
llama_init_backend();
|
||||
if (params.numa) {
|
||||
ggml_numa_init();
|
||||
}
|
||||
|
||||
llama_context * ctx;
|
||||
g_ctx = &ctx;
|
||||
|
|
|
@ -851,7 +851,6 @@ bool llama_mlock_supported() {
|
|||
|
||||
void llama_init_backend() {
|
||||
ggml_time_init();
|
||||
ggml_numa_init();
|
||||
|
||||
// needed to initialize f16 tables
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue