From f7e29e5248ada2ac451dd8e4ddd45cac9552359c Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Wed, 21 Feb 2024 11:39:48 +0000
Subject: [PATCH] More requests and threads

---
 Llamaserver.py            | 4 ++--
 examples/server/httplib.h | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Llamaserver.py b/Llamaserver.py
index 3bdee15bc..629987ac9 100644
--- a/Llamaserver.py
+++ b/Llamaserver.py
@@ -85,8 +85,8 @@ if __name__ == "__main__":
     
     url = "http://localhost:8080/completion"
 
-    num_requests = 40
-    q = Queue(maxsize = 40)
+    num_requests = 256
+    q = Queue(maxsize = 256)
     threads = []
 
     bar = make_empty_bar(num_requests)
diff --git a/examples/server/httplib.h b/examples/server/httplib.h
index 2fa9ff35b..12cdf97eb 100644
--- a/examples/server/httplib.h
+++ b/examples/server/httplib.h
@@ -93,10 +93,10 @@
 #ifndef CPPHTTPLIB_COMPRESSION_BUFSIZ
 #define CPPHTTPLIB_COMPRESSION_BUFSIZ size_t(16384u)
 #endif
-
+// the value here (8u, 16u, 32u, etc) is what governs max threads at 5126
 #ifndef CPPHTTPLIB_THREAD_POOL_COUNT
 #define CPPHTTPLIB_THREAD_POOL_COUNT                                           \
-  ((std::max)(32u, std::thread::hardware_concurrency() > 0                     \
+  ((std::max)(128u, std::thread::hardware_concurrency() > 0                     \
                       ? std::thread::hardware_concurrency() - 1                \
                       : 0))
 #endif
@@ -581,7 +581,7 @@ public:
 
 class ThreadPool : public TaskQueue {
 public:
-  explicit ThreadPool(size_t n) : shutdown_(false) {
+  explicit ThreadPool(size_t n) : shutdown_(false) {  // n is CPPHTTPLIB_THREAD_POOL_COUNT
     while (n) {
       threads_.emplace_back(worker(*this));
       n--;