diff --git a/common/common.h b/common/common.h index cf2198c8b..74c136995 100644 --- a/common/common.h +++ b/common/common.h @@ -76,7 +76,7 @@ struct gpt_params { float yarn_beta_slow = 1.0f; // YaRN high correction dim int32_t yarn_orig_ctx = 0; // YaRN original context length int32_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED; - ggml_numa_strategies numa = GGML_NUMA_STRATEGY_DISABLED; + ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED; // // sampling parameters struct llama_sampling_params sparams; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 1129de203..912c750cc 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1855,10 +1855,10 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms, { printf(" --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); } - printf(" --numa TYPE attempt optimizations that help on some NUMA systems\n"); - printf(" - distribute: spread execution evenly over all nodes\n"); - printf(" - isolate: only spawn threads on CPUs on the node that execution started on\n"); - printf(" - numactl: use the CPU map provided my numactl\n"); + printf(" --numa TYPE attempt optimizations that help on some NUMA systems\n"); + printf(" - distribute: spread execution evenly over all nodes\n"); + printf(" - isolate: only spawn threads on CPUs on the node that execution started on\n"); + printf(" - numactl: use the CPU map provided my numactl\n"); if (llama_supports_gpu_offload()) { printf(" -ngl N, --n-gpu-layers N\n"); printf(" number of layers to store in VRAM\n"); diff --git a/ggml.c b/ggml.c index d6c6b3ff5..957fa7c50 100644 --- a/ggml.c +++ b/ggml.c @@ -1954,7 +1954,7 @@ struct ggml_numa_node { }; struct ggml_numa_nodes { - enum ggml_numa_strategies numa_strategy; + enum ggml_numa_strategy numa_strategy; struct ggml_numa_node nodes[GGML_NUMA_MAX_NODES]; uint32_t n_nodes; uint32_t total_cpus; // hardware threads on system @@ -2013,7 +2013,7 @@ static uint32_t ggml_get_numa_affinity(void) { } #endif -void ggml_numa_init(enum ggml_numa_strategies numa_flag) { +void ggml_numa_init(enum ggml_numa_strategy numa_flag) { if (g_state.numa.n_nodes > 0) { fprintf(stderr, "ggml_numa_init: NUMA already initialized\n"); diff --git a/ggml.h b/ggml.h index ad65222fa..270018185 100644 --- a/ggml.h +++ b/ggml.h @@ -678,7 +678,7 @@ extern "C" { GGML_API void ggml_print_backtrace(void); - GGML_API void ggml_numa_init(enum ggml_numa_strategies numa); // call once for better performance on NUMA systems + GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node GGML_API void ggml_print_object (const struct ggml_object * obj); diff --git a/llama.cpp b/llama.cpp index 9c3640767..2bd59ea72 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11171,7 +11171,7 @@ void llama_backend_init(void) { #endif } -void llama_numa_init(enum ggml_numa_strategies numa) { +void llama_numa_init(enum ggml_numa_strategy numa) { if (numa != GGML_NUMA_STRATEGY_DISABLED) { ggml_numa_init(numa); } diff --git a/llama.h b/llama.h index a20b1f8f1..e338ff56b 100644 --- a/llama.h +++ b/llama.h @@ -309,7 +309,7 @@ extern "C" { LLAMA_API void llama_backend_init(void); //optional: - LLAMA_API void llama_numa_init(enum ggml_numa_strategies numa); + LLAMA_API void llama_numa_init(enum ggml_numa_strategy numa); // Call once at the end of the program - currently only used for MPI LLAMA_API void llama_backend_free(void);