diff --git a/Llamaserver.py b/Llamaserver.py
index 2b47b86f1..df4f9b8df 100644
--- a/Llamaserver.py
+++ b/Llamaserver.py
@@ -2,6 +2,7 @@ from queue import Queue
 import threading
 import requests
 import json
+from time import sleep
 
 def print_dict(data):
     for k, v in data.items():
@@ -38,6 +39,8 @@ def make_progress_bar(bar, count, num_requests):
 
 def send_request(q, question, event, count, num_requests):
 
+    delay = 0.1
+
     global bar
 
     data = {'prompt': question}
@@ -58,8 +61,12 @@ def send_request(q, question, event, count, num_requests):
         elif response.status_code == 429 and not q.empty():
             event.set()
             print("Server return too many requests; back off!! Reset event.")
+        else:
+            print(f"Server responded with code {response.status_code}\n")
     except Exception as e:
         print(f"Server returned exception error {e}")
+        sleep(delay)
+        delay *= 2
 
 if __name__ == "__main__":
 
@@ -94,7 +101,6 @@ if __name__ == "__main__":
         t = threading.Thread(target=send_request, args=(q, question, event, i, num_requests)) 
         t.start()
         threads.append(t)
-        # input("Any key",)
 
     for thread in threads:
         thread.join()   # wait for all threads to finish
diff --git a/common/log.h b/common/log.h
index e4e1b9f4f..bd0cc549a 100644
--- a/common/log.h
+++ b/common/log.h
@@ -41,6 +41,18 @@
 //   log_set_target( FILE* )
 //    allowing to point at stderr, stdout, or any valid FILE* file handler.
 //
+//  One way to find log files is to give a parameter or a name a strange value and then search for it
+//  that is how I discovered the llama.log inside build and log.h inside common
+//  although I don't know why it isn't called server.log
+//  note that server has logging on by default and it can only be disabled, not enabled
+//
+//  and this very useful and informative log.h file is inside examples/common.
+//
+//  As appears below, the llama.log is overwritten on every run if the default is used
+//  so it would be preferable to be able to assign a log file when the server is loaded
+//  or to have a flag that allows timestamped logs to be generated when required.
+//  It isn't very satisfactory to have to add a new filename to the code.
+//
 // --------
 //
 // End of Basic usage.
@@ -112,7 +124,7 @@ inline std::string log_get_pid()
    static std::string pid;
    if (pid.empty())
    {
-       // std::this_thread::get_id() is the most portable way of obtaining a "process id"
+       //  std::this_thread::get_id() is the most portable way of obtaining a "process id"
        //  it's not the same as "pid" but is unique enough to solve multiple instances
        //  trying to write to the same log.
        std::stringstream ss;
diff --git a/examples/server/httplib.h b/examples/server/httplib.h
index 4f08c3df9..72d806516 100644
--- a/examples/server/httplib.h
+++ b/examples/server/httplib.h
@@ -15,11 +15,11 @@
  */
 
 #ifndef CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND
-#define CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND 5
+#define CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND 5 // originally 5
 #endif
 
 #ifndef CPPHTTPLIB_KEEPALIVE_MAX_COUNT
-#define CPPHTTPLIB_KEEPALIVE_MAX_COUNT 5   // originally 5
+#define CPPHTTPLIB_KEEPALIVE_MAX_COUNT 15   // originally 5
 #endif
 
 #ifndef CPPHTTPLIB_CONNECTION_TIMEOUT_SECOND
@@ -31,7 +31,7 @@
 #endif
 
 #ifndef CPPHTTPLIB_READ_TIMEOUT_SECOND
-#define CPPHTTPLIB_READ_TIMEOUT_SECOND 5
+#define CPPHTTPLIB_READ_TIMEOUT_SECOND 15  // originally 5
 #endif
 
 #ifndef CPPHTTPLIB_READ_TIMEOUT_USECOND
@@ -47,7 +47,7 @@
 #endif
 
 #ifndef CPPHTTPLIB_IDLE_INTERVAL_SECOND
-#define CPPHTTPLIB_IDLE_INTERVAL_SECOND 0
+#define CPPHTTPLIB_IDLE_INTERVAL_SECOND 5 // originally 0
 #endif
 
 #ifndef CPPHTTPLIB_IDLE_INTERVAL_USECOND
diff --git a/examples/server/json.hpp b/examples/server/json.hpp
index ea945f346..e4c70510b 100644
--- a/examples/server/json.hpp
+++ b/examples/server/json.hpp
@@ -20312,7 +20312,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec
     basic_json(const basic_json& other)
         : m_type(other.m_type)
     {
-        // check of passed value is valid
+        // check if passed value is valid
         other.assert_invariant();
 
         switch (m_type)
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index aa9a3b991..d09277686 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -3080,7 +3080,7 @@ int main(int argc, char **argv)
 
     std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};
 
-    svr.set_default_headers({{"Server", "PJllama.cpp"}});
+    svr.set_default_headers({{"Server", "llama.cpp"}});
 
     // CORS preflight (Cross-Origin Resource Sharing)
     svr.Options(R"(.*)", [](const httplib::Request &req, httplib::Response &res)
@@ -3566,6 +3566,22 @@ int main(int argc, char **argv)
     }
     ); // get a SIGABORT error on exception here as GG says above when in Debug but not in Release
 
+    // copied from a later server.cpp
+        llama.queue_tasks.on_new_task(std::bind(
+        &llama_server_context::process_single_task, &llama, std::placeholders::_1));
+    llama.queue_tasks.on_finish_multitask(std::bind(
+        &llama_server_context::on_finish_multitask, &llama, std::placeholders::_1));
+    llama.queue_tasks.on_all_tasks_finished(std::bind(
+        &llama_server_context::run_on_all_tasks_finished, &llama));
+    llama.queue_results.on_multitask_update(std::bind(
+        &llama_server_queue::update_multitask,
+        &llama.queue_tasks,
+        std::placeholders::_1,
+        std::placeholders::_2,
+        std::placeholders::_3
+    ));
+    llama.queue_tasks.start_loop();
+
     t2.join();      // was originally t.join() despite t2 in line 3533 above
 
     llama_backend_free();
diff --git a/llama.cpp b/llama.cpp
index d39ff94c7..2dfa9199d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6718,7 +6718,7 @@ static int llama_decode_internal(
     // if we start defragmenting the cache, the benefit from this will be more important
     kv_self.n = std::min((int32_t) cparams.n_ctx, std::max(32, GGML_PAD(llama_kv_cache_cell_max(kv_self), 32)));
     //kv_self.n = llama_kv_cache_cell_max(kv_self);
-
+    // line above and below originally commented out
     //printf("kv_self.n = %5d, kv_self.used = %5d, kv_self.head = %5d\n", kv_self.n, kv_self.used, kv_self.head);
 
     ggml_allocr_reset(lctx.alloc);
@@ -10741,7 +10741,7 @@ void llama_batch_free(struct llama_batch batch) {
 
 int32_t llama_decode(
         struct llama_context * ctx,
-          struct llama_batch   batch) {
+        struct llama_batch   batch) {
     const int ret = llama_decode_internal(*ctx, batch);
     if (ret < 0) {
         LLAMA_LOG_ERROR("%s: failed to decode, ret = %d\n", __func__, ret);
diff --git a/test.sh b/test.sh
new file mode 100755
index 000000000..3f94491de
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,8 @@
+make -j tests
+export MTL_DEBUG_LAYER=1
+export MTL_SHADER_VALIDATION=1
+export MTL_SHADER_VALIDATION_REPORT_TO_STDERR=1
+export MTL_SHADER_VALIDATION_FAIL_MODE=allow
+export MTL_DEBUG_LAYER_VALIDATE_STORE_ACTIONS=1
+export MTL_DEBUG_LAYER_VALIDATE_LOAD_ACTIONS=1
+./tests/test-backend-ops -b Metal -o MUL_MAT