diff --git a/common/common.cpp b/common/common.cpp index 0f5fc11a7..90cbe94a3 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -662,9 +662,6 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { /**/ if (value == "interleave" || value == "" ) { params.numa = GGML_NUMA_STRATEGY_INTERLEAVE; } else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; } else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; } -#ifdef GGUF_NUMA_MIRROR - else if (value == "mirror") { params.numa = GGML_NUMA_STRATEGY_MIRROR; } -#endif else { invalid_param = true; break; } } } else if (arg == "--verbose-prompt") { @@ -1011,9 +1008,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" - interleave: (default) spread execution evenly over all nodes\n"); printf(" - isolate: only spawn threads on CPUs on the node that execution started on\n"); printf(" - numactl: use the CPU map provided my numactl\n"); -#ifdef GGML_NUMA_MIRROR - printf(" - mirror: NOT YET IMPLEMENTED - attempt to mirror GGUF data buffer on each node's local memory to increase throughput.\n"); -#endif printf(" if run without this previously, it is recommended to drop the system page cache before using this\n"); printf(" see https://github.com/ggerganov/llama.cpp/issues/1437\n"); if (llama_supports_gpu_offload()) { diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 0c9230d0f..7ed10d564 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1825,9 +1825,6 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms, printf(" - interleave: (default) spread execution evenly over all nodes\n"); printf(" - isolate: only spawn threads on CPUs on the node that execution started on\n"); printf(" - numactl: use the CPU map provided my numactl\n"); -#ifdef GGML_NUMA_MIRROR - printf(" - mirror: NOT YET IMPLEMENTED - attempt to mirror GGUF data buffer on each node's local memory to increase throughput.\n"); -#endif if (llama_supports_gpu_offload()) { printf(" -ngl N, --n-gpu-layers N\n"); printf(" number of layers to store in VRAM\n"); diff --git a/ggml.c b/ggml.c index 48e156a5b..3d8f20389 100644 --- a/ggml.c +++ b/ggml.c @@ -24,10 +24,6 @@ #include #include -#ifdef GGML_NUMA_MIRROR -#include -#endif - #ifdef GGML_USE_METAL #include #endif @@ -16635,10 +16631,6 @@ static void set_numa_thread_affinity(int thread_n, int n_threads) { fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv)); } return; -#ifdef GGML_NUMA_MIRROR - case GGML_NUMA_STRATEGY_MIRROR: - printf("Mirror Mode Enabled"); -#endif default: return; }