From 298207185d79155d6458e61b0099c3b08367756f Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Wed, 21 Feb 2024 21:10:54 +0000 Subject: [PATCH] small changes and threads 64 --- examples/server/httplib.h | 2 +- examples/server/server.cpp | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/server/httplib.h b/examples/server/httplib.h index 2d763fa40..37bbe9063 100644 --- a/examples/server/httplib.h +++ b/examples/server/httplib.h @@ -96,7 +96,7 @@ // the value here (8u, 16u, 32u, etc) is what governs max threads at 5126 #ifndef CPPHTTPLIB_THREAD_POOL_COUNT #define CPPHTTPLIB_THREAD_POOL_COUNT \ - ((std::max)(32u, std::thread::hardware_concurrency() > 0 \ + ((std::max)(64u, std::thread::hardware_concurrency() > 0 \ ? std::thread::hardware_concurrency() - 1 \ : 0)) #endif diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 00df17353..c9aa4e68e 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include // do we still need this? @@ -305,7 +304,9 @@ struct llama_client_slot } void print_timings(llama_client_slot &slot, bool flag = false) const { - printf("\033[21;0H"); + if (flag) { + printf("\033[21;0H"); // needs to be sensitive to the number of slots + }; LOG_TEE("Finished processing slot %d.\n", slot.id); LOG_TEE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n", __func__, t_prompt_processing, num_prompt_tokens_processed, t_prompt_processing / num_prompt_tokens_processed, 1e3 / t_prompt_processing * num_prompt_tokens_processed); @@ -1449,8 +1450,6 @@ struct llama_server_context break; } else { LOG_TEE("Activating slot %d.\n", (*slot).id); - (*slot).state = PROCESSING; // makes slot.is_processing true - (*slot).command = LOAD_PROMPT; // why not a new flag 'RUNNING'? does this do anything when state is PROC } if (task.data.contains("system_prompt"))