From 298207185d79155d6458e61b0099c3b08367756f Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Wed, 21 Feb 2024 21:10:54 +0000
Subject: [PATCH] small changes and threads 64

---
 examples/server/httplib.h  | 2 +-
 examples/server/server.cpp | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/examples/server/httplib.h b/examples/server/httplib.h
index 2d763fa40..37bbe9063 100644
--- a/examples/server/httplib.h
+++ b/examples/server/httplib.h
@@ -96,7 +96,7 @@
 // the value here (8u, 16u, 32u, etc) is what governs max threads at 5126
 #ifndef CPPHTTPLIB_THREAD_POOL_COUNT
 #define CPPHTTPLIB_THREAD_POOL_COUNT                                           \
-  ((std::max)(32u, std::thread::hardware_concurrency() > 0                     \
+  ((std::max)(64u, std::thread::hardware_concurrency() > 0                     \
                       ? std::thread::hardware_concurrency() - 1                \
                       : 0))
 #endif
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 00df17353..c9aa4e68e 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -30,7 +30,6 @@
 #include <condition_variable>
 #include <atomic>
 #include <signal.h>
-#include <string>
 
 #include <iostream> // do we still need this?
 
@@ -305,7 +304,9 @@ struct llama_client_slot
     }
 
     void print_timings(llama_client_slot &slot, bool flag = false) const {
-        printf("\033[21;0H");
+        if (flag) {
+            printf("\033[21;0H");        // needs to be sensitive to the number of slots
+        };
         LOG_TEE("Finished processing slot %d.\n", slot.id);
         LOG_TEE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
             __func__, t_prompt_processing, num_prompt_tokens_processed, t_prompt_processing / num_prompt_tokens_processed, 1e3 / t_prompt_processing * num_prompt_tokens_processed);
@@ -1449,8 +1450,6 @@ struct llama_server_context
                     break;
                 } else {
                     LOG_TEE("Activating slot %d.\n", (*slot).id);
-                    (*slot).state = PROCESSING;       // makes slot.is_processing true
-                    (*slot).command = LOAD_PROMPT;    // why not a new flag 'RUNNING'? does this do anything when state is PROC
                 }
 
                 if (task.data.contains("system_prompt"))