unified ggml_numa_strategy enum and fixed text alignment in server.cpp example
This commit is contained in:
parent
5de34f568a
commit
da652113f1
6 changed files with 10 additions and 10 deletions
|
@ -76,7 +76,7 @@ struct gpt_params {
|
||||||
float yarn_beta_slow = 1.0f; // YaRN high correction dim
|
float yarn_beta_slow = 1.0f; // YaRN high correction dim
|
||||||
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
||||||
int32_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED;
|
int32_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED;
|
||||||
ggml_numa_strategies numa = GGML_NUMA_STRATEGY_DISABLED;
|
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
||||||
|
|
||||||
// // sampling parameters
|
// // sampling parameters
|
||||||
struct llama_sampling_params sparams;
|
struct llama_sampling_params sparams;
|
||||||
|
|
|
@ -1855,10 +1855,10 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
||||||
{
|
{
|
||||||
printf(" --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
|
printf(" --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
|
||||||
}
|
}
|
||||||
printf(" --numa TYPE attempt optimizations that help on some NUMA systems\n");
|
printf(" --numa TYPE attempt optimizations that help on some NUMA systems\n");
|
||||||
printf(" - distribute: spread execution evenly over all nodes\n");
|
printf(" - distribute: spread execution evenly over all nodes\n");
|
||||||
printf(" - isolate: only spawn threads on CPUs on the node that execution started on\n");
|
printf(" - isolate: only spawn threads on CPUs on the node that execution started on\n");
|
||||||
printf(" - numactl: use the CPU map provided my numactl\n");
|
printf(" - numactl: use the CPU map provided my numactl\n");
|
||||||
if (llama_supports_gpu_offload()) {
|
if (llama_supports_gpu_offload()) {
|
||||||
printf(" -ngl N, --n-gpu-layers N\n");
|
printf(" -ngl N, --n-gpu-layers N\n");
|
||||||
printf(" number of layers to store in VRAM\n");
|
printf(" number of layers to store in VRAM\n");
|
||||||
|
|
4
ggml.c
4
ggml.c
|
@ -1954,7 +1954,7 @@ struct ggml_numa_node {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_numa_nodes {
|
struct ggml_numa_nodes {
|
||||||
enum ggml_numa_strategies numa_strategy;
|
enum ggml_numa_strategy numa_strategy;
|
||||||
struct ggml_numa_node nodes[GGML_NUMA_MAX_NODES];
|
struct ggml_numa_node nodes[GGML_NUMA_MAX_NODES];
|
||||||
uint32_t n_nodes;
|
uint32_t n_nodes;
|
||||||
uint32_t total_cpus; // hardware threads on system
|
uint32_t total_cpus; // hardware threads on system
|
||||||
|
@ -2013,7 +2013,7 @@ static uint32_t ggml_get_numa_affinity(void) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void ggml_numa_init(enum ggml_numa_strategies numa_flag) {
|
void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
|
||||||
if (g_state.numa.n_nodes > 0) {
|
if (g_state.numa.n_nodes > 0) {
|
||||||
fprintf(stderr, "ggml_numa_init: NUMA already initialized\n");
|
fprintf(stderr, "ggml_numa_init: NUMA already initialized\n");
|
||||||
|
|
||||||
|
|
2
ggml.h
2
ggml.h
|
@ -678,7 +678,7 @@ extern "C" {
|
||||||
|
|
||||||
GGML_API void ggml_print_backtrace(void);
|
GGML_API void ggml_print_backtrace(void);
|
||||||
|
|
||||||
GGML_API void ggml_numa_init(enum ggml_numa_strategies numa); // call once for better performance on NUMA systems
|
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
||||||
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
||||||
|
|
||||||
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
||||||
|
|
|
@ -11171,7 +11171,7 @@ void llama_backend_init(void) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_numa_init(enum ggml_numa_strategies numa) {
|
void llama_numa_init(enum ggml_numa_strategy numa) {
|
||||||
if (numa != GGML_NUMA_STRATEGY_DISABLED) {
|
if (numa != GGML_NUMA_STRATEGY_DISABLED) {
|
||||||
ggml_numa_init(numa);
|
ggml_numa_init(numa);
|
||||||
}
|
}
|
||||||
|
|
2
llama.h
2
llama.h
|
@ -309,7 +309,7 @@ extern "C" {
|
||||||
LLAMA_API void llama_backend_init(void);
|
LLAMA_API void llama_backend_init(void);
|
||||||
|
|
||||||
//optional:
|
//optional:
|
||||||
LLAMA_API void llama_numa_init(enum ggml_numa_strategies numa);
|
LLAMA_API void llama_numa_init(enum ggml_numa_strategy numa);
|
||||||
|
|
||||||
// Call once at the end of the program - currently only used for MPI
|
// Call once at the end of the program - currently only used for MPI
|
||||||
LLAMA_API void llama_backend_free(void);
|
LLAMA_API void llama_backend_free(void);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue