From f7e29e5248ada2ac451dd8e4ddd45cac9552359c Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Wed, 21 Feb 2024 11:39:48 +0000 Subject: [PATCH] More requests and threads --- Llamaserver.py | 4 ++-- examples/server/httplib.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Llamaserver.py b/Llamaserver.py index 3bdee15bc..629987ac9 100644 --- a/Llamaserver.py +++ b/Llamaserver.py @@ -85,8 +85,8 @@ if __name__ == "__main__": url = "http://localhost:8080/completion" - num_requests = 40 - q = Queue(maxsize = 40) + num_requests = 256 + q = Queue(maxsize = 256) threads = [] bar = make_empty_bar(num_requests) diff --git a/examples/server/httplib.h b/examples/server/httplib.h index 2fa9ff35b..12cdf97eb 100644 --- a/examples/server/httplib.h +++ b/examples/server/httplib.h @@ -93,10 +93,10 @@ #ifndef CPPHTTPLIB_COMPRESSION_BUFSIZ #define CPPHTTPLIB_COMPRESSION_BUFSIZ size_t(16384u) #endif - +// the value here (8u, 16u, 32u, etc) is what governs max threads at 5126 #ifndef CPPHTTPLIB_THREAD_POOL_COUNT #define CPPHTTPLIB_THREAD_POOL_COUNT \ - ((std::max)(32u, std::thread::hardware_concurrency() > 0 \ + ((std::max)(128u, std::thread::hardware_concurrency() > 0 \ ? std::thread::hardware_concurrency() - 1 \ : 0)) #endif @@ -581,7 +581,7 @@ public: class ThreadPool : public TaskQueue { public: - explicit ThreadPool(size_t n) : shutdown_(false) { + explicit ThreadPool(size_t n) : shutdown_(false) { // n is CPPHTTPLIB_THREAD_POOL_COUNT while (n) { threads_.emplace_back(worker(*this)); n--;