diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 5a952f99b..de20374be 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2824,18 +2824,42 @@ int main(int argc, char **argv)
         }
     });
 
+    LOG_INFO("HTTP server listening", log_data);
+    // run the HTTP server in a thread - see comment below
+    std::thread t([&]()
+            {
+                if (!svr.listen_after_bind())
+                {
+                    server_state = ERROR;
+                    return 1;
+                }
+
+                return 0;
+            });
+
+    // GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!?
+    //     "Bus error: 10" - this is on macOS, it does not crash on Linux
+    //std::thread t2([&]()
+    {
+        bool running = true;
+        while (running)
+        {
+            running = llama.update_slots();
+        }
+    }
+    //);
+
 
     // load the model
     if (!llama.load_model(params))
     {
         server_state = ERROR;
         return 1;
+    } else {
+        llama.initialize();
+        server_state = READY;
     }
 
-    llama.initialize();
-    server_state = READY;
-
-
     // Middleware for API key validation
     auto validate_api_key = [&sparams](const httplib::Request &req, httplib::Response &res) -> bool {
         // If API key is not set, skip validation
@@ -3252,30 +3276,6 @@ int main(int argc, char **argv)
         log_data["api_key"] = "api_key: ****" + sparams.api_key.substr(sparams.api_key.length() - 4);
     }
 
-    LOG_INFO("HTTP server listening", log_data);
-    // run the HTTP server in a thread - see comment below
-    std::thread t([&]()
-            {
-                if (!svr.listen_after_bind())
-                {
-                    return 1;
-                }
-
-                return 0;
-            });
-
-    // GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!?
-    //     "Bus error: 10" - this is on macOS, it does not crash on Linux
-    //std::thread t2([&]()
-    {
-        bool running = true;
-        while (running)
-        {
-            running = llama.update_slots();
-        }
-    }
-    //);
-
     t.join();
 
     llama_backend_free();