diff --git a/Llamaserver.py b/Llamaserver.py
index ce48aeae2..3e02d7721 100644
--- a/Llamaserver.py
+++ b/Llamaserver.py
@@ -52,8 +52,12 @@ def send_request(q, question, event, count, num_requests):
     delay = 0.1
 
     global bar
-
-    data = {'prompt': question}
+    
+    system = "You are a helpful assistant who answers all requests \
+courteously and accurately without undue repetion. \
+you pay close attention to the nuance of a question and response accordingly."
+    
+    data = {'system': system, 'prompt': question}
     
     try:
         response = requests.post(url, headers=headers, json=data)
@@ -114,7 +118,7 @@ if __name__ == "__main__":
     
     for i in range(num_requests):
         country = country_list[i % len(country_list)]
-        question = f"What was the total population of {country} in 2018?"
+        question = f"Tell me the political history of {country} up to 2018."
         # NOTE: don't pass the parameter as a function call; pass in args
         print(f"Processing request {i} / {num_requests}: {question}\n")
         event = threading.Event()
diff --git a/cmakescript.sh b/cmakescript.sh
new file mode 100755
index 000000000..6d77f9446
--- /dev/null
+++ b/cmakescript.sh
@@ -0,0 +1,13 @@
+# simple automation of cmake
+# usage is ./cmakescript.sh Debug || Release
+
+rm -r build
+cmake -B build
+cd build
+
+if [ $# -eq 1 ] && [[ "$1" == "Debug" || "$1" == "Release" ]]; then
+  cmake --build . --config "$1"
+else
+  echo "Usage: $0 (Debug|Release)"
+  exit 1
+fi
diff --git a/examples/cmap-example/CMakeLists.txt b/examples/cmap-example/CMakeLists.txt
index d62ca26cf..83344b42a 100644
--- a/examples/cmap-example/CMakeLists.txt
+++ b/examples/cmap-example/CMakeLists.txt
@@ -1,8 +1,8 @@
-set(TARGET TCPshellscript)
+set(TARGET TCPportquiz)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-add_executable(${TARGET} TCPshellscript.cpp)
+add_executable(${TARGET} TCPportquiz.cpp)
 install(TARGETS ${TARGET} RUNTIME)
-target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 if (WIN32)
     TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
 endif()
diff --git a/examples/cmap-example/TCPshellscript.cpp b/examples/cmap-example/TCPportquiz.cpp
similarity index 100%
rename from examples/cmap-example/TCPshellscript.cpp
rename to examples/cmap-example/TCPportquiz.cpp
diff --git a/examples/cmap-example/cursor.cpp b/examples/cmap-example/cursor.cpp
new file mode 100644
index 000000000..92acb6b69
--- /dev/null
+++ b/examples/cmap-example/cursor.cpp
@@ -0,0 +1,26 @@
+// just trying to get the cursor position
+
+#include <cstdlib>
+
+struct CursorPos {
+  int x;
+  int y;
+};
+
+static CursorPos getCursorPos() {
+
+  // Get text cursor position
+  auto cursorPos = getCursorPos();
+
+  // Assign to struct
+  CursorPos pos;
+  pos.x = cursorPos.x;
+  pos.y = cursorPos.y;
+
+  return pos;
+}
+
+int main() {
+    CursorPos cursor = getCursorPos();
+    printf("The x co-ordinate of the cursor is %zu\n; the y co-ordinate of the cursor is %zu\n", cursor.x, cursor.y);
+}
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 9cb16dbb6..e935b8447 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -305,7 +305,7 @@ struct llama_client_slot
 
     void print_timings(llama_client_slot &slot, bool flag = false) const {
         if (flag) {
-            printf("\033[21;0H");        // needs to be sensitive to the number of slots
+            printf("\033[5;0H");        // needs to be sensitive to the number of slots
         };
         LOG_TEE("Finished processing slot %d.\n", slot.id);
         LOG_TEE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
@@ -315,7 +315,7 @@ struct llama_client_slot
         LOG_TEE("%s:       total time = %10.2f ms\n", __func__, t_prompt_processing + t_token_generation);
 
         if (flag) {
-            printf("\033[25;0HPress any key ... ");
+            printf("\033[KPress any key ... \n");
             getchar();
         }
     }
@@ -323,11 +323,11 @@ struct llama_client_slot
 
 // experimental/diagostic graphic to show kvcache status
 // requires just `slots` and `params.n_ctx` as parameters
-static void kvgraphics(std::vector<llama_client_slot>& slots, int cache_size) {
+static void kvgraphics(std::vector<llama_client_slot>& slots) {
 
     int max_length = 128;
     int num_blocks = slots.size();
-    size_t slot_cache_size = cache_size / num_blocks;
+    size_t slot_cache_size = slots[0].n_ctx;
     bool cls_flag = true;   // this flag only prevents repeated cls inside one call
     std::string slot_symbol1 = "";
     std::string slot_symbol2 = "";
@@ -344,13 +344,16 @@ static void kvgraphics(std::vector<llama_client_slot>& slots, int cache_size) {
 
     // Print visualization
     // Always start at the top left of the window (H means 'move cursor to this position'; 2J = cls)
+    // See eblow for a rethink because controlling log printing is such a pain in C++11
     // Only clear the screen the first time round
     if (cls_flag) {
-        printf("\033[2J");
+        // printf("\033[2J");
         cls_flag = false;
     }
     printf("\033[1;0H\033[K**************************\n\033[KKVcache occupancy by slot:\n\033[K**************************\n");
 
+    // we can know and control how many lines of output we are printing so just start below that and fix the graphics location
+    printf("\033[%d;0H", 10);
     for(int i=0; i<num_blocks; i++) {
         printf("\033[K");  // clear the current line
         for(int j=0; j < max_length; j++) {
@@ -384,7 +387,8 @@ static void kvgraphics(std::vector<llama_client_slot>& slots, int cache_size) {
         }
     printf(" %4zu/%5zu %2d %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str());
     }
-    printf("\n\033[%dJ", num_blocks+5);     // move cursor to end of cache display
+    printf("\033[5;0H");   // just start two lines below the heading
+    //printf("\n\033[%d;0H\033[%dJ", 10, num_blocks+5);     // move cursor to end of cache display and clear thereafter
 }
 
 struct llama_server_context
@@ -494,7 +498,7 @@ struct llama_server_context
             slot.n_ctx = n_ctx_slot;
             slot.n_predict = params.n_predict;
 
-            LOG_TEE(" -> Slot %i - max context: %i\n", slot.id, n_ctx_slot);
+            LOG_TEE(" -> Slot %2i - max context: %i\n", slot.id, n_ctx_slot);
 
             const int ga_n = params.grp_attn_n;
             const int ga_w = params.grp_attn_w;
@@ -504,7 +508,7 @@ struct llama_server_context
                 GGML_ASSERT(ga_w % ga_n == 0            && "ga_w must be a multiple of ga_n");             // NOLINT
                 //GGML_ASSERT(n_ctx_train % ga_w == 0     && "n_ctx_train must be a multiple of ga_w");    // NOLINT
                 //GGML_ASSERT(n_ctx >= n_ctx_train * ga_n && "n_ctx must be at least n_ctx_train * ga_n"); // NOLINT
-                LOG_TEE(" -> Slot %i - self-extend: ga_n = %d, ga_w = %d\n", slot.id, ga_n, ga_w);
+                LOG_TEE(" -> Slot %2i - self-extend: ga_n = %d, ga_w = %d\n", slot.id, ga_n, ga_w);
             }
 
             slot.ga_i = 0;
@@ -581,6 +585,7 @@ struct llama_server_context
 
         for (llama_client_slot & slot : slots)
         {
+            printf("\033[5;0H");
             if (slot.id == -1 && slot.available())
             {
                 LOG_TEE("Unallocated task now using slot %d", slot.id);
@@ -1438,6 +1443,7 @@ struct llama_server_context
                 // why should task.data already contain a slot_id key when we haven't allocated it?
                 // because if it doesnt the returned value will be -1; what makes it anything else?
                 int requested_slot = json_value(task.data, "slot_id", -1);
+                printf("\033[5;0H\033[K");
                 LOG_TEE("Task %d requesting slot %d\n", task.id, requested_slot);
 
                 // why are we suddenly using 'slot' as a pointer here - confusing?
@@ -1449,6 +1455,7 @@ struct llama_server_context
                     queue_tasks.defer(task);
                     break;
                 } else {
+                    printf("\033[5;0H\033[K");
                     LOG_TEE("Activating slot %d.\n", (*slot).id);
                 }
 
@@ -1554,6 +1561,7 @@ struct llama_server_context
                     const int n_left    = system_tokens.size() + slot.n_past - slot.params.n_keep - 1;
                     const int n_discard = n_left / 2;       // is this arbitrary?
 
+                    printf("\033[5;0H\033[K");
                     LOG_TEE("slot %d: context shift - n_keep = %d, n_left = %d, n_discard = %d\n", slot.id, slot.params.n_keep, n_left, n_discard);
                     llama_kv_cache_seq_rm   (ctx, slot.id, slot.params.n_keep + 1            , slot.params.n_keep + n_discard + 1);
                     llama_kv_cache_seq_shift(ctx, slot.id, slot.params.n_keep + 1 + n_discard, system_tokens.size() + slot.n_past, -n_discard);
@@ -1588,6 +1596,7 @@ struct llama_server_context
                 slot.command = NONE;
                 slot.t_last_used = ggml_time_us();
 
+                printf("\033[6;0H\033[K");
                 LOG_TEE("slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
                 queue_tasks.notify_slot_changed();  // why don't we immediately reallocate the released slot without waiting? Is this what -cb does?
 
@@ -1736,6 +1745,7 @@ struct llama_server_context
                             slot.ga_i = ga_i;
                         }
 
+                        printf("\033[7;0H\033[K");
                         LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed);
                     }
 
@@ -1752,6 +1762,7 @@ struct llama_server_context
                         }
                     }
 
+                    printf("\033[5;0H\033[K");
                     LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past);
 
                     llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1);
@@ -1936,7 +1947,7 @@ struct llama_server_context
 
         // we are still inside llama_server_context so we can use an unqualified parameter
         if (skvgraphics) {
-            kvgraphics(slots, params.n_ctx);
+            kvgraphics(slots);
             }
 
         return true;
@@ -2916,7 +2927,8 @@ int main(int argc, char **argv)
                     return;
                 }
                 // it appears that here we first get ONE request to parse; then TEN; then ONE-by-ONE
-                printf("Request body to parse: %s", req.body.c_str());
+                printf("\033[5;0H\033[K");
+                LOG_TEE("Request body to parse: %s", req.body.c_str());
                 if (llama.skvinteract) {
                     getchar();
                 }
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 28dbfe970..1aa45f28c 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -36,6 +36,7 @@ extern bool server_verbose;
 #define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__)
 #define LOG_INFO(   MSG, ...) server_log("INFO",    __func__, __LINE__, MSG, __VA_ARGS__)
 
+
 //
 // parallel
 //
@@ -244,7 +245,9 @@ struct llama_server_queue {
     void defer(task_server task) {
         std::unique_lock<std::mutex> lock(mutex_tasks);
         queue_tasks_deferred.push_back(std::move(task));
-        LOG_TEE("Deferred task queue now has %3zu members.\n", queue_tasks_deferred.size());
+        printf("\033[1;50H*** ");
+        LOG_TEE("Deferred queue now has %3zu members.\n", queue_tasks_deferred.size());
+        printf("\033[5;0H");
     }
 
     // Get the next id for creating a new task
@@ -390,15 +393,17 @@ struct llama_server_response {
     void add_waiting_task_id(int task_id) {
         std::unique_lock<std::mutex> lock(mutex_results);
         waiting_task_ids.insert(task_id);
-        printf("\033[21;0H");
+        printf("\033[1;50H*** ");
         LOG_TEE("Waiting task list size after addition: %zu.\n", waiting_task_ids.size());
+        printf("\033[5;0H");
     }
 
     void remove_waiting_task_id(int task_id) {
         std::unique_lock<std::mutex> lock(mutex_results);
         waiting_task_ids.erase(task_id);
-        printf("\033[21;0H");
+        printf("\033[2;50H*** ");
         LOG_TEE("Waiting task list size after removal: %zu.\n", waiting_task_ids.size());
+        printf("\033[5;0H");
     }
 
     // This function blocks the thread until there is a response for this task_id