diff --git a/examples/common.cpp b/examples/common.cpp
index 1308f8410..aad21898d 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -288,6 +288,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
             params.use_mmap = false;
         } else if (arg == "--mtest") {
             params.mem_test = true;
+        } else if (arg == "--numa") {
+            params.numa = true;
         } else if (arg == "--verbose-prompt") {
             params.verbose_prompt = true;
         } else if (arg == "-r" || arg == "--reverse-prompt") {
@@ -421,6 +423,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     if (llama_mmap_supported()) {
         fprintf(stderr, "  --no-mmap             do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
     }
+    fprintf(stderr, "  --numa                attempt optimizations that help on some NUMA systems\n");
+    fprintf(stderr, "                        if run without this previously, it is recommended to drop the system page cache before using this\n");
+    fprintf(stderr, "                        see https://github.com/ggerganov/llama.cpp/issues/1437\n");
     fprintf(stderr, "  -ngl N, --n-gpu-layers N\n");
     fprintf(stderr, "                        number of layers to store in VRAM\n");
     fprintf(stderr, "  --mtest               compute maximum memory usage\n");
diff --git a/examples/common.h b/examples/common.h
index 2b66382a6..9d74bd7b8 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -70,6 +70,7 @@ struct gpt_params {
     bool use_mmap          = true;  // use mmap for faster loads
     bool use_mlock         = false; // use mlock to keep model in memory
     bool mem_test          = false; // compute maximum memory usage
+    bool numa              = false; // attempt optimizations that help on some NUMA systems
     bool verbose_prompt    = false; // print prompt tokens before generation
 };
 
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 47b418d97..100e9d65b 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -5,6 +5,7 @@
 
 #include "common.h"
 #include "llama.h"
+#include "ggml.h"
 #include "build-info.h"
 
 #include <cassert>
@@ -97,6 +98,9 @@ int main(int argc, char ** argv) {
     }
 
     llama_init_backend();
+    if (params.numa) {
+        ggml_numa_init();
+    }
 
     llama_context * ctx;
     g_ctx = &ctx;
diff --git a/llama.cpp b/llama.cpp
index 468f96cc1..4cbc8d6b6 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -851,7 +851,6 @@ bool llama_mlock_supported() {
 
 void llama_init_backend() {
     ggml_time_init();
-    ggml_numa_init();
 
     // needed to initialize f16 tables
     {