diff --git a/apikeys.txt b/apikeys.txt
index 441abe9f3..41172eff1 100644
--- a/apikeys.txt
+++ b/apikeys.txt
@@ -1,3 +1,7 @@
 john123456
 susan987654
 guestabcdef
+fred123123
+george890890
+sandra234234
+tilly567567
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index f3867c204..1dccb2194 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -309,40 +309,43 @@ struct llama_client_slot
        char buffer[512];
         double t_token = t_prompt_processing / num_prompt_tokens_processed;
         double n_tokens_second = 1e3 / t_prompt_processing * num_prompt_tokens_processed;
+        printf("\033[72;0H]");
         sprintf(buffer, "prompt eval time     = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)",
                 t_prompt_processing, num_prompt_tokens_processed,
                 t_token, n_tokens_second);
-        LOG_INFO(buffer, {
+        /*LOG_INFO(buffer, {
             {"slot_id",                     id},
             {"task_id",                     task_id},
             {"t_prompt_processing",         t_prompt_processing},
             {"num_prompt_tokens_processed", num_prompt_tokens_processed},
             {"t_token",                     t_token},
             {"n_tokens_second",             n_tokens_second},
-        });
+        });*/
 
         t_token = t_token_generation / n_decoded;
         n_tokens_second = 1e3 / t_token_generation * n_decoded;
+        printf("\033[72;0H]");
         sprintf(buffer, "generation eval time = %10.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)",
                 t_token_generation, n_decoded,
                 t_token, n_tokens_second);
-        LOG_INFO(buffer, {
+        /*LOG_INFO(buffer, {
             {"slot_id",            id},
             {"task_id",            task_id},
             {"t_token_generation", t_token_generation},
             {"n_decoded",          n_decoded},
             {"t_token",            t_token},
             {"n_tokens_second",    n_tokens_second},
-        });
+        });*/
 
+        printf("\033[5;0H]");
         sprintf(buffer, "          total time = %10.2f ms", t_prompt_processing + t_token_generation);
-        LOG_INFO(buffer, {
+        /*LOG_INFO(buffer, {
             {"slot_id",             id},
             {"task_id",             task_id},
             {"t_prompt_processing", t_prompt_processing},
             {"t_token_generation",  t_token_generation},
             {"t_total",             t_prompt_processing + t_token_generation},
-        });
+        });*/
     }
 };
 
@@ -411,7 +414,7 @@ static void kvgraphics(std::vector<llama_client_slot>& slots) {
     printf("\033[1;0H\033[K**************************\n\033[KKVcache occupancy by slot:\n\033[K**************************\n");
 
     // we can know and control how many lines of output we are printing so just start below that and fix the graphics location
-    printf("\033[%d;0H", 10);
+    printf("\033[%d;0H", 5);
     for(int i=0; i<num_blocks; i++) {
         //printf("\033[K");  // clear the current line
         for(int j=0; j < max_length; j++) {
@@ -441,7 +444,7 @@ static void kvgraphics(std::vector<llama_client_slot>& slots) {
         if(slots[i].cache_tokens.size() == slot_cache_size) {
             slot_symbol3 = "\u274E"; // red box white cross
         } else {
-            slot_symbol3 = "";
+            slot_symbol3 = "\u22EE";
         }
     printf(" %4zu/%5zu %2d %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str());
     }
@@ -568,10 +571,10 @@ struct llama_server_context
             slot.n_ctx = n_ctx_slot;
             slot.n_predict = params.n_predict;
 
-            LOG_INFO("new slot", {
+            /*LOG_INFO("new slot", {
                 {"slot_id",    slot.id},
                 {"n_ctx_slot", slot.n_ctx}
-            });
+            });*/
 
             const int ga_n = params.grp_attn_n;
             const int ga_w = params.grp_attn_w;
@@ -582,11 +585,11 @@ struct llama_server_context
                 //GGML_ASSERT(n_ctx_train % ga_w == 0     && "n_ctx_train must be a multiple of ga_w");    // NOLINT
                 //GGML_ASSERT(n_ctx >= n_ctx_train * ga_n && "n_ctx must be at least n_ctx_train * ga_n"); // NOLINT
 
-                LOG_INFO("slot self-extend", {
+                /*LOG_INFO("slot self-extend", {
                     {"slot_id",   slot.id},
                     {"ga_n",      ga_n},
                     {"ga_w",      ga_w}
-                });
+                });*/
             }
 
             slot.ga_i = 0;
@@ -963,10 +966,10 @@ struct llama_server_context
 
         all_slots_are_idle = false;
 
-        LOG_INFO("slot is processing task", {
+        /*LOG_INFO("slot is processing task", {
             {"slot_id", slot->id},
             {"task_id", slot->task_id},
-        });
+        });*/
 
         return true;
     }
@@ -1556,7 +1559,7 @@ struct llama_server_context
                     queue_tasks.defer(task);
                     break;
                 } else {
-                    printf("\033[5;0H\033[K");
+                    printf("\033[5;0\033[K");
                     LOG("Activating slot %d.\n", (*slot).id);
                 }
 
@@ -1631,11 +1634,11 @@ struct llama_server_context
                     }
                     slots_data.push_back(slot_data);
                 }
-                LOG_INFO("slot data", {
+                /*LOG_INFO("slot data", {
                     {"task_id",            task.id},
                     {"n_idle_slots",       n_idle_slots},
                     {"n_processing_slots", n_processing_slots}
-                });
+                });*/
                 LOG_VERBOSE("slot data", {
                     {"task_id",            task.id},
                     {"n_idle_slots",       n_idle_slots},
@@ -1693,7 +1696,7 @@ struct llama_server_context
     bool update_slots() {
         if (system_need_update)
         {
-            LOG_INFO("updating system prompt", {});
+            //LOG_INFO("updating system prompt", {});
             update_system_prompt();
         }
 
@@ -1703,7 +1706,7 @@ struct llama_server_context
         {
             if (system_prompt.empty() && clean_kv_cache)
             {
-                LOG_INFO("all slots are idle and system prompt is empty, clear the KV cache", {});
+                /*LOG_INFO("all slots are idle and system prompt is empty, clear the KV cache", {});*/
                 kv_cache_clear();
             }
             return true;
@@ -1728,7 +1731,7 @@ struct llama_server_context
                     const int n_left    = (int) system_tokens.size() + slot.n_past - n_keep;
                     const int n_discard = n_left / 2;
 
-                    LOG_INFO("slot context shift", {
+                    /*LOG_INFO("slot context shift", {
                         {"slot_id",         slot.id},
                         {"task_id",         slot.task_id},
                         {"n_keep",          n_keep},
@@ -1738,7 +1741,7 @@ struct llama_server_context
                         {"n_past",          slot.n_past},
                         {"n_system_tokens", system_tokens.size()},
                         {"n_cache_tokens",  slot.cache_tokens.size()}
-                    });
+                    });*/
                     llama_kv_cache_seq_rm (ctx, slot.id, n_keep            , n_keep + n_discard);
                     llama_kv_cache_seq_add(ctx, slot.id, n_keep + n_discard, system_tokens.size() + slot.n_past, -n_discard);
 
@@ -1767,7 +1770,7 @@ struct llama_server_context
                 slot.command = NONE;
                 slot.t_last_used = ggml_time_us();
 
-                LOG_INFO("slot released", {
+                /*LOG_INFO("slot released", {
                     {"slot_id",         slot.id},
                     {"task_id",         slot.task_id},
                     {"n_ctx",           n_ctx},
@@ -1775,7 +1778,7 @@ struct llama_server_context
                     {"n_system_tokens", system_tokens.size()},
                     {"n_cache_tokens",  slot.cache_tokens.size()},
                     {"truncated",       slot.truncated}
-                });
+                });*/
                 queue_tasks.notify_slot_changed();
 
                 continue;
@@ -1931,12 +1934,12 @@ struct llama_server_context
                             slot.ga_i = ga_i;
                         }
 
-                        LOG_INFO("slot progression", {
+                        /*LOG_INFO("slot progression", {
                             { "slot_id", slot.id },
                             { "task_id", slot.task_id },
                             { "n_past",  slot.n_past },
                             { "num_prompt_tokens_processed", slot.num_prompt_tokens_processed }
-                        });
+                        });*/
                     }
 
                     slot.cache_tokens = prompt_tokens;
@@ -1956,11 +1959,11 @@ struct llama_server_context
                     }
 
                     int p0 = (int) system_tokens.size() + slot.n_past;
-                    LOG_INFO("kv cache rm [p0, end)", {
+                    /*LOG_INFO("kv cache rm [p0, end)", {
                         { "slot_id", slot.id },
                         { "task_id", slot.task_id },
                         { "p0",      p0 }
-                    });
+                    });*/
                     llama_kv_cache_seq_rm(ctx, slot.id, p0, -1);
 
                     LOG_VERBOSE("prompt ingested", {
@@ -2856,14 +2859,14 @@ static void log_server_request(const httplib::Request &req, const httplib::Respo
         return;
     }
 
-    LOG_INFO("request", {
+    /*LOG_INFO("request", {
         {"remote_addr", req.remote_addr},
         {"remote_port", req.remote_port},
         {"status",      res.status},
         {"method",      req.method},
         {"path",        req.path},
         {"params",      req.params},
-    });
+    });*/
 
     LOG_VERBOSE("request", {
         {"request",  req.body},
@@ -2920,7 +2923,7 @@ int main(int argc, char **argv)
     llama_numa_init(params.numa);
     ggml_time_init();
 
-    LOG_INFO("build info", {{"build", LLAMA_BUILD_NUMBER},
+    /*LOG_INFO("build info", {{"build", LLAMA_BUILD_NUMBER},
                             {"commit", LLAMA_COMMIT}});
 
     LOG_INFO("system info", {
@@ -2928,7 +2931,7 @@ int main(int argc, char **argv)
                                 {"n_threads_batch", params.n_threads_batch},
                                 {"total_threads", std::thread::hardware_concurrency()},
                                 {"system_info", llama_print_system_info()},
-                            });
+                            });*/
 
     httplib::Server svr;
 
@@ -3261,7 +3264,7 @@ int main(int argc, char **argv)
                 }
                 // it appears that here we first get ONE request to parse; then TEN; then ONE-by-ONE
                 printf("\033[5;0H\033[K");
-                LOG_TEE("Request body to parse: %s.\n", req.body.c_str());
+                LOG("Request body to parse: %s.\n", req.body.c_str());
                 if (llama.skvinteract) {
                     getchar();
                 }
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index cc2564c84..11df11f2b 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -158,7 +158,7 @@ static inline void server_log(const char *level, const char *function, int line,
         std::cout << log.dump(-1, ' ', false, json::error_handler_t::replace) << "\n" << std::flush;
     } else {
         char buf[1024];
-        snprintf(buf, 1024, "%4s [%24s] %s", level, function, message);
+        snprintf(buf, 1024, "\033[72;0H%4s [%24s] %s", level, function, message);
 
         if (!extra.empty()) {
             log.merge_patch(extra);
@@ -168,12 +168,12 @@ static inline void server_log(const char *level, const char *function, int line,
         for (const auto& el : log.items())
         {
             const std::string value = el.value().dump(-1, ' ', false, json::error_handler_t::replace);
-            snprintf(buf, 1024, " %s=%s", el.key().c_str(), value.c_str());
+            snprintf(buf, 1024, "\033[72;0H %s=%s", el.key().c_str(), value.c_str());
             ss << buf;
         }
 
         const std::string str = ss.str();
-        printf("%.*s\n", (int)str.size(), str.data());
+        printf("\033[72;0H%.*s\n", (int)str.size(), str.data());
         fflush(stdout);
     }
 }