server : add --numa support (#2524)
This commit is contained in:
parent
348acf188c
commit
d75561df20
2 changed files with 6 additions and 0 deletions
|
@ -666,6 +666,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
|||
{
|
||||
fprintf(stdout, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
|
||||
}
|
||||
fprintf(stdout, " --numa attempt optimizations that help on some NUMA systems\n");
|
||||
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
|
||||
fprintf(stdout, " -ngl N, --n-gpu-layers N\n");
|
||||
fprintf(stdout, " number of layers to store in VRAM\n");
|
||||
|
@ -940,6 +941,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||
{
|
||||
params.use_mmap = false;
|
||||
}
|
||||
else if (arg == "--numa")
|
||||
{
|
||||
params.numa = true;
|
||||
}
|
||||
else if (arg == "--embedding")
|
||||
{
|
||||
params.embedding = true;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue